diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice')
44 files changed, 17654 insertions, 3897 deletions
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 2d140ba83781..59544b0fc086 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -13,8 +13,13 @@ ice-y := ice_main.o \ ice_nvm.o \ ice_switch.o \ ice_sched.o \ + ice_base.o \ ice_lib.o \ + ice_txrx_lib.o \ ice_txrx.o \ + ice_flex_pipe.o \ + ice_flow.o \ ice_ethtool.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o -ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o +ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o +ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 9ee6b55553c0..cb10abb14e11 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -8,6 +8,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/firmware.h> #include <linux/netdevice.h> #include <linux/compiler.h> #include <linux/etherdevice.h> @@ -28,9 +29,13 @@ #include <linux/ip.h> #include <linux/sctp.h> #include <linux/ipv6.h> +#include <linux/pkt_sched.h> #include <linux/if_bridge.h> +#include <linux/ctype.h> +#include <linux/bpf.h> #include <linux/avf/virtchnl.h> #include <net/ipv6.h> +#include <net/xdp_sock.h> #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" @@ -40,6 +45,7 @@ #include "ice_sched.h" #include "ice_virtchnl_pf.h" #include "ice_sriov.h" +#include "ice_xsk.h" extern const char ice_drv_ver[]; #define ICE_BAR0 0 @@ -47,33 +53,16 @@ extern const char ice_drv_ver[]; #define ICE_MIN_NUM_DESC 64 #define ICE_MAX_NUM_DESC 8160 #define ICE_DFLT_MIN_RX_DESC 512 -/* if the default number of Rx descriptors between ICE_MAX_NUM_DESC and the - * number of descriptors to fill up an entire page is greater than or equal to - * ICE_DFLT_MIN_RX_DESC set it based on page size, otherwise set it to - * ICE_DFLT_MIN_RX_DESC - */ -#define ICE_DFLT_NUM_RX_DESC \ - min_t(u16, ICE_MAX_NUM_DESC, \ - max_t(u16, ALIGN(PAGE_SIZE / sizeof(union ice_32byte_rx_desc), \ - ICE_REQ_DESC_MULTIPLE), \ - ICE_DFLT_MIN_RX_DESC)) -/* set default number of Tx descriptors to the minimum between ICE_MAX_NUM_DESC - * and the number of descriptors to fill up an entire page - */ -#define ICE_DFLT_NUM_TX_DESC min_t(u16, ICE_MAX_NUM_DESC, \ - ALIGN(PAGE_SIZE / \ - sizeof(struct ice_tx_desc), \ - ICE_REQ_DESC_MULTIPLE)) +#define ICE_DFLT_NUM_TX_DESC 256 +#define ICE_DFLT_NUM_RX_DESC 2048 #define ICE_DFLT_TRAFFIC_CLASS BIT(0) #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) -#define ICE_ETHTOOL_FWVER_LEN 32 #define ICE_AQ_LEN 64 -#define ICE_MBXQ_LEN 64 +#define ICE_MBXSQ_LEN 64 +#define ICE_MBXRQ_LEN 512 #define ICE_MIN_MSIX 2 #define ICE_NO_VSI 0xffff -#define ICE_MAX_TXQS 2048 -#define ICE_MAX_RXQS 2048 #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 #define ICE_MAX_SCATTER_TXQS 16 @@ -86,16 +75,6 @@ extern const char ice_drv_ver[]; #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) #define ICE_INVAL_Q_INDEX 0xffff #define ICE_INVAL_VFID 256 -#define ICE_MAX_VF_COUNT 256 -#define ICE_MAX_QS_PER_VF 256 -#define ICE_MIN_QS_PER_VF 1 -#define ICE_DFLT_QS_PER_VF 4 -#define ICE_NONQ_VECS_VF 1 -#define ICE_MAX_SCATTER_QS_PER_VF 16 -#define ICE_MAX_BASE_QS_PER_VF 16 -#define ICE_MAX_INTR_PER_VF 65 -#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) -#define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) #define ICE_MAX_RESET_WAIT 20 @@ -103,8 +82,7 @@ extern const char ice_drv_ver[]; #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) -#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \ - (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))) +#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD) #define ICE_UP_TABLE_TRANSLATE(val, i) \ (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \ @@ -152,6 +130,16 @@ extern const char ice_drv_ver[]; ICE_PROMISC_VLAN_TX | \ ICE_PROMISC_VLAN_RX) +#define ice_pf_to_dev(pf) (&((pf)->pdev->dev)) + +struct ice_txq_meta { + u32 q_teid; /* Tx-scheduler element identifier */ + u16 q_id; /* Entry in VSI's txq_map bitmap */ + u16 q_handle; /* Relative index of Tx queue within TC */ + u16 vsi_idx; /* VSI index that Tx queue belongs to */ + u8 tc; /* TC number that Tx queue belongs to */ +}; + struct ice_tc_info { u16 qoffset; u16 qcount_tx; @@ -186,6 +174,8 @@ struct ice_sw { struct ice_pf *pf; u16 sw_id; /* switch ID for this switch */ u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */ + struct ice_vsi *dflt_vsi; /* default VSI for this switch */ + u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */ }; enum ice_state { @@ -194,6 +184,7 @@ enum ice_state { __ICE_NEEDS_RESTART, __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ __ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ + __ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */ __ICE_PFR_REQ, /* set by driver and peers */ __ICE_CORER_REQ, /* set by driver and peers */ __ICE_GLOBR_REQ, /* set by driver and peers */ @@ -220,6 +211,7 @@ enum ice_state { __ICE_CFG_BUSY, __ICE_SERVICE_SCHED, __ICE_SERVICE_DIS, + __ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ __ICE_STATE_NBITS /* must be last */ }; @@ -257,9 +249,6 @@ struct ice_vsi { u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ - /* Interrupt thresholds */ - u16 work_lmt; - s16 vf_id; /* VF ID for SR-IOV VSIs */ u16 ethtype; /* Ethernet protocol for pause frame */ @@ -288,19 +277,29 @@ struct ice_vsi { u8 current_isup:1; /* Sync 'link up' logging */ u8 stat_offsets_loaded:1; u8 vlan_ena:1; + u16 num_vlan; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ u8 rx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ - u16 txq_map[ICE_MAX_TXQS]; /* index in pf->avail_txqs */ - u16 rxq_map[ICE_MAX_RXQS]; /* index in pf->avail_rxqs */ + u16 *txq_map; /* index in pf->avail_txqs */ + u16 *rxq_map; /* index in pf->avail_rxqs */ u16 alloc_txq; /* Allocated Tx queues */ u16 num_txq; /* Used Tx queues */ u16 alloc_rxq; /* Allocated Rx queues */ u16 num_rxq; /* Used Rx queues */ + u16 req_txq; /* User requested Tx queues */ + u16 req_rxq; /* User requested Rx queues */ u16 num_rx_desc; u16 num_tx_desc; struct ice_tc_cfg tc_cfg; + struct bpf_prog *xdp_prog; + struct ice_ring **xdp_rings; /* XDP ring array */ + u16 num_xdp_txq; /* Used XDP queues */ + u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + struct xdp_umem **xsk_umems; + u16 num_xsk_umems_used; + u16 num_xsk_umems; } ____cacheline_internodealigned_in_smp; /* struct that defines an interrupt vector */ @@ -329,16 +328,18 @@ struct ice_q_vector { } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { - ICE_FLAG_MSIX_ENA, ICE_FLAG_FLTR_SYNC, ICE_FLAG_RSS_ENA, ICE_FLAG_SRIOV_ENA, ICE_FLAG_SRIOV_CAPABLE, ICE_FLAG_DCB_CAPABLE, ICE_FLAG_DCB_ENA, + ICE_FLAG_ADV_FEATURES, ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, - ICE_FLAG_ENABLE_FW_LLDP, + ICE_FLAG_NO_MEDIA, + ICE_FLAG_FW_LLDP_AGENT, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ + ICE_FLAG_LEGACY_RX, ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -363,24 +364,25 @@ struct ice_pf { u16 num_vf_qps; /* num queue pairs per VF */ u16 num_vf_msix; /* num vectors per VF */ DECLARE_BITMAP(state, __ICE_STATE_NBITS); - DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS); - DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); + unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ + unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */ unsigned long serv_tmr_period; unsigned long serv_tmr_prev; struct timer_list serv_tmr; struct work_struct serv_task; struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ + struct mutex tc_mutex; /* lock to protect TC changes */ u32 msg_enable; u32 hw_csum_rx_error; u32 oicr_idx; /* Other interrupt cause MSIX vector index */ u32 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ + u16 max_pf_txqs; /* Total Tx queues PF wide */ + u16 max_pf_rxqs; /* Total Rx queues PF wide */ u32 num_lan_msix; /* Total MSIX vectors for base driver */ u16 num_lan_tx; /* num LAN Tx queues setup */ u16 num_lan_rx; /* num LAN Rx queues setup */ - u16 q_left_tx; /* remaining num Tx queues left unclaimed */ - u16 q_left_rx; /* remaining num Rx queues left unclaimed */ u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ u16 num_alloc_vsi; u16 corer_count; /* Core reset count */ @@ -433,21 +435,58 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, } /** - * ice_find_vsi_by_type - Find and return VSI of a given type - * @pf: PF to search for VSI - * @type: Value indicating type of VSI we are looking for + * ice_netdev_to_pf - Retrieve the PF struct associated with a netdev + * @netdev: pointer to the netdev struct + */ +static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + return np->vsi->back; +} + +static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) +{ + return !!vsi->xdp_prog; +} + +static inline void ice_set_ring_xdp(struct ice_ring *ring) +{ + ring->flags |= ICE_TX_FLAGS_RING_XDP; +} + +/** + * ice_xsk_umem - get XDP UMEM bound to a ring + * @ring - ring to use + * + * Returns a pointer to xdp_umem structure if there is an UMEM present, + * NULL otherwise. */ -static inline struct ice_vsi * -ice_find_vsi_by_type(struct ice_pf *pf, enum ice_vsi_type type) +static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring) { - int i; + struct xdp_umem **umems = ring->vsi->xsk_umems; + u16 qid = ring->q_index; + + if (ice_ring_is_xdp(ring)) + qid -= ring->vsi->num_xdp_txq; - for (i = 0; i < pf->num_alloc_vsi; i++) { - struct ice_vsi *vsi = pf->vsi[i]; + if (qid >= ring->vsi->num_xsk_umems || !umems || !umems[qid] || + !ice_is_xdp_ena_vsi(ring->vsi)) + return NULL; - if (vsi && vsi->type == type) - return vsi; - } + return umems[qid]; +} + +/** + * ice_get_main_vsi - Get the PF VSI + * @pf: PF instance + * + * returns pf->vsi[0], which by definition is the PF VSI + */ +static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf) +{ + if (pf->vsi) + return pf->vsi[0]; return NULL; } @@ -455,18 +494,26 @@ ice_find_vsi_by_type(struct ice_pf *pf, enum ice_vsi_type type) int ice_vsi_setup_tx_rings(struct ice_vsi *vsi); int ice_vsi_setup_rx_rings(struct ice_vsi *vsi); void ice_set_ethtool_ops(struct net_device *netdev); +void ice_set_ethtool_safe_mode_ops(struct net_device *netdev); +u16 ice_get_avail_txq_count(struct ice_pf *pf); +u16 ice_get_avail_rxq_count(struct ice_pf *pf); +int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx); +void ice_update_vsi_stats(struct ice_vsi *vsi); +void ice_update_pf_stats(struct ice_pf *pf); int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); int ice_vsi_cfg(struct ice_vsi *vsi); struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); +int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog); +int ice_destroy_xdp_rings(struct ice_vsi *vsi); +int +ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); +int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); -#ifdef CONFIG_DCB -int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked); -void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked); -#endif /* CONFIG_DCB */ int ice_open(struct net_device *netdev); int ice_stop(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 765e3c2ed045..6873998cf145 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -33,11 +33,22 @@ struct ice_aqc_get_ver { u8 api_patch; }; +/* Send driver version (indirect 0x0002) */ +struct ice_aqc_driver_ver { + u8 major_ver; + u8 minor_ver; + u8 build_ver; + u8 subbuild_ver; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + /* Queue Shutdown (direct 0x0003) */ struct ice_aqc_q_shutdown { - __le32 driver_unloading; + u8 driver_unloading; #define ICE_AQC_DRIVER_UNLOADING BIT(0) - u8 reserved[12]; + u8 reserved[15]; }; /* Request resource ownership (direct 0x0008) @@ -91,6 +102,7 @@ struct ice_aqc_list_caps_elem { #define ICE_AQC_CAPS_SRIOV 0x0012 #define ICE_AQC_CAPS_VF 0x0013 #define ICE_AQC_CAPS_VSI 0x0017 +#define ICE_AQC_CAPS_DCB 0x0018 #define ICE_AQC_CAPS_RSS 0x0040 #define ICE_AQC_CAPS_RXQS 0x0041 #define ICE_AQC_CAPS_TXQS 0x0042 @@ -220,6 +232,13 @@ struct ice_aqc_get_sw_cfg_resp { */ #define ICE_AQC_RES_TYPE_VSI_LIST_REP 0x03 #define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE 0x04 +#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID 0x60 +#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM 0x61 + +#define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM BIT(12) +#define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX BIT(13) + +#define ICE_AQC_RES_TYPE_FLAG_DEDICATED 0x00 /* Allocate Resources command (indirect 0x0208) * Free Resources command (indirect 0x0209) @@ -730,6 +749,10 @@ struct ice_aqc_add_elem { struct ice_aqc_txsched_elem_data generic[1]; }; +struct ice_aqc_conf_elem { + struct ice_aqc_txsched_elem_data generic[1]; +}; + struct ice_aqc_get_elem { struct ice_aqc_txsched_elem_data generic[1]; }; @@ -771,6 +794,44 @@ struct ice_aqc_port_ets_elem { __le32 tc_node_teid[8]; /* Used for response, reserved in command */ }; +/* Rate limiting profile for + * Add RL profile (indirect 0x0410) + * Query RL profile (indirect 0x0411) + * Remove RL profile (indirect 0x0415) + * These indirect commands acts on single or multiple + * RL profiles with specified data. + */ +struct ice_aqc_rl_profile { + __le16 num_profiles; + __le16 num_processed; /* Only for response. Reserved in Command. */ + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_rl_profile_elem { + u8 level; + u8 flags; +#define ICE_AQC_RL_PROFILE_TYPE_S 0x0 +#define ICE_AQC_RL_PROFILE_TYPE_M (0x3 << ICE_AQC_RL_PROFILE_TYPE_S) +#define ICE_AQC_RL_PROFILE_TYPE_CIR 0 +#define ICE_AQC_RL_PROFILE_TYPE_EIR 1 +#define ICE_AQC_RL_PROFILE_TYPE_SRL 2 +/* The following flag is used for Query RL Profile Data */ +#define ICE_AQC_RL_PROFILE_INVAL_S 0x7 +#define ICE_AQC_RL_PROFILE_INVAL_M (0x1 << ICE_AQC_RL_PROFILE_INVAL_S) + + __le16 profile_id; + __le16 max_burst_size; + __le16 rl_multiply; + __le16 wake_up_calc; + __le16 rl_encode; +}; + +struct ice_aqc_rl_profile_generic_elem { + struct ice_aqc_rl_profile_elem generic[1]; +}; + /* Query Scheduler Resource Allocation (indirect 0x0412) * This indirect command retrieves the scheduler resources allocated by * EMP Firmware to the given PF. @@ -1032,6 +1093,10 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_LINK_TOPO_CONFLICT BIT(0) #define ICE_AQ_LINK_MEDIA_CONFLICT BIT(1) #define ICE_AQ_LINK_TOPO_CORRUPT BIT(2) +#define ICE_AQ_LINK_TOPO_UNREACH_PRT BIT(4) +#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT BIT(5) +#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA BIT(6) +#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA BIT(7) u8 reserved1; u8 link_info; #define ICE_AQ_LINK_UP BIT(0) /* Link Status */ @@ -1135,6 +1200,33 @@ struct ice_aqc_set_port_id_led { u8 rsvd[13]; }; +/* Read/Write SFF EEPROM command (indirect 0x06EE) */ +struct ice_aqc_sff_eeprom { + u8 lport_num; + u8 lport_num_valid; +#define ICE_AQC_SFF_PORT_NUM_VALID BIT(0) + __le16 i2c_bus_addr; +#define ICE_AQC_SFF_I2CBUS_7BIT_M 0x7F +#define ICE_AQC_SFF_I2CBUS_10BIT_M 0x3FF +#define ICE_AQC_SFF_I2CBUS_TYPE_M BIT(10) +#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT 0 +#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT ICE_AQC_SFF_I2CBUS_TYPE_M +#define ICE_AQC_SFF_SET_EEPROM_PAGE_S 11 +#define ICE_AQC_SFF_SET_EEPROM_PAGE_M (0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S) +#define ICE_AQC_SFF_NO_PAGE_CHANGE 0 +#define ICE_AQC_SFF_SET_23_ON_MISMATCH 1 +#define ICE_AQC_SFF_SET_22_ON_MISMATCH 2 +#define ICE_AQC_SFF_IS_WRITE BIT(15) + __le16 i2c_mem_addr; + __le16 eeprom_page; +#define ICE_AQC_SFF_EEPROM_BANK_S 0 +#define ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S) +#define ICE_AQC_SFF_EEPROM_PAGE_S 8 +#define ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S) + __le32 addr_high; + __le32 addr_low; +}; + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1518,6 +1610,57 @@ struct ice_aqc_get_clear_fw_log { __le32 addr_low; }; +/* Download Package (indirect 0x0C40) */ +/* Also used for Update Package (indirect 0x0C42) */ +struct ice_aqc_download_pkg { + u8 flags; +#define ICE_AQC_DOWNLOAD_PKG_LAST_BUF 0x01 + u8 reserved[3]; + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_download_pkg_resp { + __le32 error_offset; + __le32 error_info; + __le32 addr_high; + __le32 addr_low; +}; + +/* Get Package Info List (indirect 0x0C43) */ +struct ice_aqc_get_pkg_info_list { + __le32 reserved1; + __le32 reserved2; + __le32 addr_high; + __le32 addr_low; +}; + +/* Version format for packages */ +struct ice_pkg_ver { + u8 major; + u8 minor; + u8 update; + u8 draft; +}; + +#define ICE_PKG_NAME_SIZE 32 + +struct ice_aqc_get_pkg_info { + struct ice_pkg_ver ver; + char name[ICE_PKG_NAME_SIZE]; + u8 is_in_nvm; + u8 is_active; + u8 is_active_at_boot; + u8 is_modified; +}; + +/* Get Package Info List response buffer format (0x0C43) */ +struct ice_aqc_get_pkg_info_resp { + __le32 count; + struct ice_aqc_get_pkg_info pkg_info[1]; +}; + /** * struct ice_aq_desc - Admin Queue (AQ) descriptor * @flags: ICE_AQ_FLAG_* flags @@ -1546,6 +1689,7 @@ struct ice_aq_desc { u8 raw[16]; struct ice_aqc_generic generic; struct ice_aqc_get_ver get_ver; + struct ice_aqc_driver_ver driver_ver; struct ice_aqc_q_shutdown q_shutdown; struct ice_aqc_req_res res_owner; struct ice_aqc_manage_mac_read mac_read; @@ -1555,6 +1699,7 @@ struct ice_aq_desc { struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_set_phy_cfg set_phy; struct ice_aqc_restart_an restart_an; + struct ice_aqc_sff_eeprom read_write_sff_param; struct ice_aqc_set_port_id_led set_port_id_led; struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_sw_rules sw_rules; @@ -1562,6 +1707,7 @@ struct ice_aq_desc { struct ice_aqc_sched_elem_cmd sched_elem_cmd; struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_query_port_ets port_ets; + struct ice_aqc_rl_profile rl_profile; struct ice_aqc_nvm nvm; struct ice_aqc_nvm_checksum nvm_checksum; struct ice_aqc_pf_vf_msg virt; @@ -1579,6 +1725,7 @@ struct ice_aq_desc { struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res; struct ice_aqc_fw_logging fw_logging; struct ice_aqc_get_clear_fw_log get_clear_fw_log; + struct ice_aqc_download_pkg download_pkg; struct ice_aqc_set_mac_lb set_mac_lb; struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; struct ice_aqc_set_event_mask set_event_mask; @@ -1610,12 +1757,19 @@ enum ice_aq_err { ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */ ICE_AQ_RC_EEXIST = 13, /* Object already exists */ ICE_AQ_RC_ENOSPC = 16, /* No space left or allocation failure */ + ICE_AQ_RC_ENOSYS = 17, /* Function not implemented */ + ICE_AQ_RC_ENOSEC = 24, /* Missing security manifest */ + ICE_AQ_RC_EBADSIG = 25, /* Bad RSA signature */ + ICE_AQ_RC_ESVN = 26, /* SVN number prohibits this package */ + ICE_AQ_RC_EBADMAN = 27, /* Manifest hash mismatch */ + ICE_AQ_RC_EBADBUF = 28, /* Buffer hash mismatches manifest */ }; /* Admin Queue command opcodes */ enum ice_adminq_opc { /* AQ commands */ ice_aqc_opc_get_ver = 0x0001, + ice_aqc_opc_driver_ver = 0x0002, ice_aqc_opc_q_shutdown = 0x0003, /* resource ownership */ @@ -1655,12 +1809,15 @@ enum ice_adminq_opc { /* transmit scheduler commands */ ice_aqc_opc_get_dflt_topo = 0x0400, ice_aqc_opc_add_sched_elems = 0x0401, + ice_aqc_opc_cfg_sched_elems = 0x0403, ice_aqc_opc_get_sched_elems = 0x0404, ice_aqc_opc_suspend_sched_elems = 0x0409, ice_aqc_opc_resume_sched_elems = 0x040A, ice_aqc_opc_query_port_ets = 0x040E, ice_aqc_opc_delete_sched_elems = 0x040F, + ice_aqc_opc_add_rl_profiles = 0x0410, ice_aqc_opc_query_sched_res = 0x0412, + ice_aqc_opc_remove_rl_profiles = 0x0415, /* PHY commands */ ice_aqc_opc_get_phy_caps = 0x0600, @@ -1670,6 +1827,7 @@ enum ice_adminq_opc { ice_aqc_opc_set_event_mask = 0x0613, ice_aqc_opc_set_mac_lb = 0x0620, ice_aqc_opc_set_port_id_led = 0x06E9, + ice_aqc_opc_sff_eeprom = 0x06EE, /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, @@ -1697,6 +1855,11 @@ enum ice_adminq_opc { ice_aqc_opc_add_txqs = 0x0C30, ice_aqc_opc_dis_txqs = 0x0C31, + /* package commands */ + ice_aqc_opc_download_pkg = 0x0C40, + ice_aqc_opc_update_pkg = 0x0C42, + ice_aqc_opc_get_pkg_info_list = 0x0C43, + /* debug commands */ ice_aqc_opc_fw_logging = 0xFF09, ice_aqc_opc_fw_logging_info = 0xFF10, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c new file mode 100644 index 000000000000..81885efadc7a --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -0,0 +1,846 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice_base.h" +#include "ice_dcb_lib.h" + +/** + * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI + * @qs_cfg: gathered variables needed for PF->VSI queues assignment + * + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap + */ +static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) +{ + int offset, i; + + mutex_lock(qs_cfg->qs_mutex); + offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size, + 0, qs_cfg->q_count, 0); + if (offset >= qs_cfg->pf_map_size) { + mutex_unlock(qs_cfg->qs_mutex); + return -ENOMEM; + } + + bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count); + for (i = 0; i < qs_cfg->q_count; i++) + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset; + mutex_unlock(qs_cfg->qs_mutex); + + return 0; +} + +/** + * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI + * @qs_cfg: gathered variables needed for pf->vsi queues assignment + * + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap + */ +static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg) +{ + int i, index = 0; + + mutex_lock(qs_cfg->qs_mutex); + for (i = 0; i < qs_cfg->q_count; i++) { + index = find_next_zero_bit(qs_cfg->pf_map, + qs_cfg->pf_map_size, index); + if (index >= qs_cfg->pf_map_size) + goto err_scatter; + set_bit(index, qs_cfg->pf_map); + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index; + } + mutex_unlock(qs_cfg->qs_mutex); + + return 0; +err_scatter: + for (index = 0; index < i; index++) { + clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map); + qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0; + } + mutex_unlock(qs_cfg->qs_mutex); + + return -ENOMEM; +} + +/** + * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled + * @pf: the PF being configured + * @pf_q: the PF queue + * @ena: enable or disable state of the queue + * + * This routine will wait for the given Rx queue of the PF to reach the + * enabled or disabled state. + * Returns -ETIMEDOUT in case of failing to reach the requested state after + * multiple retries; else will return 0 in case of success. + */ +static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) +{ + int i; + + for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) { + if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) & + QRX_CTRL_QENA_STAT_M)) + return 0; + + usleep_range(20, 40); + } + + return -ETIMEDOUT; +} + +/** + * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector + * @vsi: the VSI being configured + * @v_idx: index of the vector in the VSI struct + * + * We allocate one q_vector and set default value for ITR setting associated + * with this q_vector. If allocation fails we return -ENOMEM. + */ +static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_q_vector *q_vector; + + /* allocate q_vector */ + q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector), + GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + q_vector->vsi = vsi; + q_vector->v_idx = v_idx; + q_vector->tx.itr_setting = ICE_DFLT_TX_ITR; + q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; + if (vsi->type == ICE_VSI_VF) + goto out; + /* only set affinity_mask if the CPU is online */ + if (cpu_online(v_idx)) + cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + + /* This will not be called in the driver load path because the netdev + * will not be created yet. All other cases with register the NAPI + * handler here (i.e. resume, reset/rebuild, etc.) + */ + if (vsi->netdev) + netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, + NAPI_POLL_WEIGHT); + +out: + /* tie q_vector and VSI together */ + vsi->q_vectors[v_idx] = q_vector; + + return 0; +} + +/** + * ice_free_q_vector - Free memory allocated for a specific interrupt vector + * @vsi: VSI having the memory freed + * @v_idx: index of the vector to be freed + */ +static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_q_vector *q_vector; + struct ice_pf *pf = vsi->back; + struct ice_ring *ring; + struct device *dev; + + dev = ice_pf_to_dev(pf); + if (!vsi->q_vectors[v_idx]) { + dev_dbg(dev, "Queue vector at index %d not found\n", v_idx); + return; + } + q_vector = vsi->q_vectors[v_idx]; + + ice_for_each_ring(ring, q_vector->tx) + ring->q_vector = NULL; + ice_for_each_ring(ring, q_vector->rx) + ring->q_vector = NULL; + + /* only VSI with an associated netdev is set up with NAPI */ + if (vsi->netdev) + netif_napi_del(&q_vector->napi); + + devm_kfree(dev, q_vector); + vsi->q_vectors[v_idx] = NULL; +} + +/** + * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set + * @hw: board specific structure + */ +static void ice_cfg_itr_gran(struct ice_hw *hw) +{ + u32 regval = rd32(hw, GLINT_CTL); + + /* no need to update global register if ITR gran is already set */ + if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) && + (((regval & GLINT_CTL_ITR_GRAN_200_M) >> + GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_100_M) >> + GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_50_M) >> + GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_25_M) >> + GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US)) + return; + + regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) & + GLINT_CTL_ITR_GRAN_200_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) & + GLINT_CTL_ITR_GRAN_100_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) & + GLINT_CTL_ITR_GRAN_50_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) & + GLINT_CTL_ITR_GRAN_25_M); + wr32(hw, GLINT_CTL, regval); +} + +/** + * ice_calc_q_handle - calculate the queue handle + * @vsi: VSI that ring belongs to + * @ring: ring to get the absolute queue index + * @tc: traffic class number + */ +static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) +{ + WARN_ONCE(ice_ring_is_xdp(ring) && tc, + "XDP ring can't belong to TC other than 0"); + + /* Idea here for calculation is that we subtract the number of queue + * count from TC that ring belongs to from it's absolute queue index + * and as a result we get the queue's index within TC. + */ + return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset; +} + +/** + * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance + * @ring: The Tx ring to configure + * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized + * @pf_q: queue index in the PF space + * + * Configure the Tx descriptor ring in TLAN context. + */ +static void +ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + + tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; + + tlan_ctx->port_num = vsi->port_info->lport; + + /* Transmit Queue Length */ + tlan_ctx->qlen = ring->count; + + ice_set_cgd_num(tlan_ctx, ring); + + /* PF number */ + tlan_ctx->pf_num = hw->pf_id; + + /* queue belongs to a specific VSI type + * VF / VM index should be programmed per vmvf_type setting: + * for vmvf_type = VF, it is VF number between 0-256 + * for vmvf_type = VM, it is VM number between 0-767 + * for PF or EMP this field should be set to zero + */ + switch (vsi->type) { + case ICE_VSI_LB: + /* fall through */ + case ICE_VSI_PF: + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + break; + case ICE_VSI_VF: + /* Firmware expects vmvf_num to be absolute VF ID */ + tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id; + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; + break; + default: + return; + } + + /* make sure the context is associated with the right VSI */ + tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); + + tlan_ctx->tso_ena = ICE_TX_LEGACY; + tlan_ctx->tso_qnum = pf_q; + + /* Legacy or Advanced Host Interface: + * 0: Advanced Host Interface + * 1: Legacy Host Interface + */ + tlan_ctx->legacy_int = ICE_TX_LEGACY; +} + +/** + * ice_setup_rx_ctx - Configure a receive ring context + * @ring: The Rx ring to configure + * + * Configure the Rx descriptor ring in RLAN context. + */ +int ice_setup_rx_ctx(struct ice_ring *ring) +{ + int chain_len = ICE_MAX_CHAINED_RX_BUFS; + struct ice_vsi *vsi = ring->vsi; + u32 rxdid = ICE_RXDID_FLEX_NIC; + struct ice_rlan_ctx rlan_ctx; + struct ice_hw *hw; + u32 regval; + u16 pf_q; + int err; + + hw = &vsi->back->hw; + + /* what is Rx queue number in global space of 2K Rx queues */ + pf_q = vsi->rxq_map[ring->q_index]; + + /* clear the context structure first */ + memset(&rlan_ctx, 0, sizeof(rlan_ctx)); + + ring->rx_buf_len = vsi->rx_buf_len; + + if (ring->vsi->type == ICE_VSI_PF) { + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + /* coverity[check_return] */ + xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index); + + ring->xsk_umem = ice_xsk_umem(ring); + if (ring->xsk_umem) { + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + + ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr - + XDP_PACKET_HEADROOM; + /* For AF_XDP ZC, we disallow packets to span on + * multiple buffers, thus letting us skip that + * handling in the fast-path. + */ + chain_len = 1; + ring->zca.free = ice_zca_free; + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_ZERO_COPY, + &ring->zca); + if (err) + return err; + + dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", + ring->q_index); + } else { + ring->zca.free = NULL; + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + /* coverity[check_return] */ + xdp_rxq_info_reg(&ring->xdp_rxq, + ring->netdev, + ring->q_index); + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + return err; + } + } + /* Receive Queue Base Address. + * Indicates the starting address of the descriptor queue defined in + * 128 Byte units. + */ + rlan_ctx.base = ring->dma >> 7; + + rlan_ctx.qlen = ring->count; + + /* Receive Packet Data Buffer Size. + * The Packet Data Buffer Size is defined in 128 byte units. + */ + rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; + + /* use 32 byte descriptors */ + rlan_ctx.dsize = 1; + + /* Strip the Ethernet CRC bytes before the packet is posted to host + * memory. + */ + rlan_ctx.crcstrip = 1; + + /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ + rlan_ctx.l2tsel = 1; + + rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; + + /* This controls whether VLAN is stripped from inner headers + * The VLAN in the inner L2 header is stripped to the receive + * descriptor if enabled by this flag. + */ + rlan_ctx.showiv = 0; + + /* Max packet size for this queue - must not be set to a larger value + * than 5 x DBUF + */ + rlan_ctx.rxmax = min_t(u16, vsi->max_frame, + chain_len * ring->rx_buf_len); + + /* Rx queue threshold in units of 64 */ + rlan_ctx.lrxqthresh = 1; + + /* Enable Flexible Descriptors in the queue context which + * allows this driver to select a specific receive descriptor format + */ + if (vsi->type != ICE_VSI_VF) { + regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); + regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & + QRXFLXP_CNTXT_RXDID_IDX_M; + + /* increasing context priority to pick up profile ID; + * default is 0x01; setting to 0x03 to ensure profile + * is programming if prev context is of same priority + */ + regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & + QRXFLXP_CNTXT_RXDID_PRIO_M; + + wr32(hw, QRXFLXP_CNTXT(pf_q), regval); + } + + /* Absolute queue number out of 2K needs to be passed */ + err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", + pf_q, err); + return -EIO; + } + + if (vsi->type == ICE_VSI_VF) + return 0; + + /* configure Rx buffer alignment */ + if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) + ice_clear_ring_build_skb_ena(ring); + else + ice_set_ring_build_skb_ena(ring); + + /* init queue specific tail register */ + ring->tail = hw->hw_addr + QRX_TAIL(pf_q); + writel(0, ring->tail); + + err = ring->xsk_umem ? + ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) : + ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); + if (err) + dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", + ring->xsk_umem ? "UMEM enabled " : "", + ring->q_index, pf_q); + + return 0; +} + +/** + * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI + * @qs_cfg: gathered variables needed for pf->vsi queues assignment + * + * This function first tries to find contiguous space. If it is not successful, + * it tries with the scatter approach. + * + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap + */ +int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg) +{ + int ret = 0; + + ret = __ice_vsi_get_qs_contig(qs_cfg); + if (ret) { + /* contig failed, so try with scatter approach */ + qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER; + qs_cfg->q_count = min_t(u16, qs_cfg->q_count, + qs_cfg->scatter_count); + ret = __ice_vsi_get_qs_sc(qs_cfg); + } + return ret; +} + +/** + * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring + * @vsi: the VSI being configured + * @ena: start or stop the Rx rings + * @rxq_idx: Rx queue index + */ +int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx) +{ + int pf_q = vsi->rxq_map[rxq_idx]; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int ret = 0; + u32 rx_reg; + + rx_reg = rd32(hw, QRX_CTRL(pf_q)); + + /* Skip if the queue is already in the requested state */ + if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) + return 0; + + /* turn on/off the queue */ + if (ena) + rx_reg |= QRX_CTRL_QENA_REQ_M; + else + rx_reg &= ~QRX_CTRL_QENA_REQ_M; + wr32(hw, QRX_CTRL(pf_q), rx_reg); + + /* wait for the change to finish */ + ret = ice_pf_rxq_wait(pf, pf_q, ena); + if (ret) + dev_err(ice_pf_to_dev(pf), "VSI idx %d Rx ring %d %sable timeout\n", + vsi->idx, pf_q, (ena ? "en" : "dis")); + + return ret; +} + +/** + * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors + * @vsi: the VSI being configured + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) +{ + struct device *dev = ice_pf_to_dev(vsi->back); + int v_idx, err; + + if (vsi->q_vectors[0]) { + dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num); + return -EEXIST; + } + + for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) { + err = ice_vsi_alloc_q_vector(vsi, v_idx); + if (err) + goto err_out; + } + + return 0; + +err_out: + while (v_idx--) + ice_free_q_vector(vsi, v_idx); + + dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n", + vsi->num_q_vectors, vsi->vsi_num, err); + vsi->num_q_vectors = 0; + return err; +} + +/** + * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors + * @vsi: the VSI being configured + * + * This function maps descriptor rings to the queue-specific vectors allotted + * through the MSI-X enabling code. On a constrained vector budget, we map Tx + * and Rx rings to the vector as "efficiently" as possible. + */ +void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) +{ + int q_vectors = vsi->num_q_vectors; + int tx_rings_rem, rx_rings_rem; + int v_id; + + /* initially assigning remaining rings count to VSIs num queue value */ + tx_rings_rem = vsi->num_txq; + rx_rings_rem = vsi->num_rxq; + + for (v_id = 0; v_id < q_vectors; v_id++) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; + int tx_rings_per_v, rx_rings_per_v, q_id, q_base; + + /* Tx rings mapping to vector */ + tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); + q_vector->num_ring_tx = tx_rings_per_v; + q_vector->tx.ring = NULL; + q_vector->tx.itr_idx = ICE_TX_ITR; + q_base = vsi->num_txq - tx_rings_rem; + + for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { + struct ice_ring *tx_ring = vsi->tx_rings[q_id]; + + tx_ring->q_vector = q_vector; + tx_ring->next = q_vector->tx.ring; + q_vector->tx.ring = tx_ring; + } + tx_rings_rem -= tx_rings_per_v; + + /* Rx rings mapping to vector */ + rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); + q_vector->num_ring_rx = rx_rings_per_v; + q_vector->rx.ring = NULL; + q_vector->rx.itr_idx = ICE_RX_ITR; + q_base = vsi->num_rxq - rx_rings_rem; + + for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { + struct ice_ring *rx_ring = vsi->rx_rings[q_id]; + + rx_ring->q_vector = q_vector; + rx_ring->next = q_vector->rx.ring; + q_vector->rx.ring = rx_ring; + } + rx_rings_rem -= rx_rings_per_v; + } +} + +/** + * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors + * @vsi: the VSI having memory freed + */ +void ice_vsi_free_q_vectors(struct ice_vsi *vsi) +{ + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) + ice_free_q_vector(vsi, v_idx); +} + +/** + * ice_vsi_cfg_txq - Configure single Tx queue + * @vsi: the VSI that queue belongs to + * @ring: Tx ring to be configured + * @qg_buf: queue group buffer + */ +int +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_aqc_add_tx_qgrp *qg_buf) +{ + struct ice_tlan_ctx tlan_ctx = { 0 }; + struct ice_aqc_add_txqs_perq *txq; + struct ice_pf *pf = vsi->back; + u8 buf_len = sizeof(*qg_buf); + enum ice_status status; + u16 pf_q; + u8 tc; + + pf_q = ring->reg_idx; + ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); + /* copy context contents into the qg_buf */ + qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); + ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, + ice_tlan_ctx_info); + + /* init queue specific tail reg. It is referred as + * transmit comm scheduler queue doorbell. + */ + ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); + + if (IS_ENABLED(CONFIG_DCB)) + tc = ring->dcb_tc; + else + tc = 0; + + /* Add unique software queue handle of the Tx queue per + * TC into the VSI Tx ring + */ + ring->q_handle = ice_calc_q_handle(vsi, ring, tc); + + status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, + 1, qg_buf, buf_len, NULL); + if (status) { + dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n", + status); + return -ENODEV; + } + + /* Add Tx Queue TEID into the VSI Tx ring from the + * response. This will complete configuring and + * enabling the queue. + */ + txq = &qg_buf->txqs[0]; + if (pf_q == le16_to_cpu(txq->txq_id)) + ring->txq_teid = le32_to_cpu(txq->q_teid); + + return 0; +} + +/** + * ice_cfg_itr - configure the initial interrupt throttle values + * @hw: pointer to the HW structure + * @q_vector: interrupt vector that's being configured + * + * Configure interrupt throttling values for the ring containers that are + * associated with the interrupt vector passed in. + */ +void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) +{ + ice_cfg_itr_gran(hw); + + if (q_vector->num_ring_rx) { + struct ice_ring_container *rc = &q_vector->rx; + + rc->target_itr = ITR_TO_REG(rc->itr_setting); + rc->next_update = jiffies + 1; + rc->current_itr = rc->target_itr; + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); + } + + if (q_vector->num_ring_tx) { + struct ice_ring_container *rc = &q_vector->tx; + + rc->target_itr = ITR_TO_REG(rc->itr_setting); + rc->next_update = jiffies + 1; + rc->current_itr = rc->target_itr; + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); + } +} + +/** + * ice_cfg_txq_interrupt - configure interrupt on Tx queue + * @vsi: the VSI being configured + * @txq: Tx queue being mapped to MSI-X vector + * @msix_idx: MSI-X vector index within the function + * @itr_idx: ITR index of the interrupt cause + * + * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector + * within the function space. + */ +void +ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + + itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M; + + val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | + ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M); + + wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); + if (ice_is_xdp_ena_vsi(vsi)) { + u32 xdp_txq = txq + vsi->num_xdp_txq; + + wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), + val); + } + ice_flush(hw); +} + +/** + * ice_cfg_rxq_interrupt - configure interrupt on Rx queue + * @vsi: the VSI being configured + * @rxq: Rx queue being mapped to MSI-X vector + * @msix_idx: MSI-X vector index within the function + * @itr_idx: ITR index of the interrupt cause + * + * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector + * within the function space. + */ +void +ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + + itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M; + + val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | + ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M); + + wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + + ice_flush(hw); +} + +/** + * ice_trigger_sw_intr - trigger a software interrupt + * @hw: pointer to the HW structure + * @q_vector: interrupt vector to trigger the software interrupt for + */ +void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) +{ + wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), + (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) | + GLINT_DYN_CTL_SWINT_TRIG_M | + GLINT_DYN_CTL_INTENA_M); +} + +/** + * ice_vsi_stop_tx_ring - Disable single Tx ring + * @vsi: the VSI being configured + * @rst_src: reset source + * @rel_vmvf_num: Relative ID of VF/VM + * @ring: Tx ring to be stopped + * @txq_meta: Meta data of Tx ring to be stopped + */ +int +ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, + u16 rel_vmvf_num, struct ice_ring *ring, + struct ice_txq_meta *txq_meta) +{ + struct ice_pf *pf = vsi->back; + struct ice_q_vector *q_vector; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + u32 val; + + /* clear cause_ena bit for disabled queues */ + val = rd32(hw, QINT_TQCTL(ring->reg_idx)); + val &= ~QINT_TQCTL_CAUSE_ENA_M; + wr32(hw, QINT_TQCTL(ring->reg_idx), val); + + /* software is expected to wait for 100 ns */ + ndelay(100); + + /* trigger a software interrupt for the vector + * associated to the queue to schedule NAPI handler + */ + q_vector = ring->q_vector; + if (q_vector) + ice_trigger_sw_intr(hw, q_vector); + + status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx, + txq_meta->tc, 1, &txq_meta->q_handle, + &txq_meta->q_id, &txq_meta->q_teid, rst_src, + rel_vmvf_num, NULL); + + /* if the disable queue command was exercised during an + * active reset flow, ICE_ERR_RESET_ONGOING is returned. + * This is not an error as the reset operation disables + * queues at the hardware level anyway. + */ + if (status == ICE_ERR_RESET_ONGOING) { + dev_dbg(ice_pf_to_dev(vsi->back), "Reset in progress. LAN Tx queues already disabled\n"); + } else if (status == ICE_ERR_DOES_NOT_EXIST) { + dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n"); + } else if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n", + status); + return -ENODEV; + } + + return 0; +} + +/** + * ice_fill_txq_meta - Prepare the Tx queue's meta data + * @vsi: VSI that ring belongs to + * @ring: ring that txq_meta will be based on + * @txq_meta: a helper struct that wraps Tx queue's information + * + * Set up a helper struct that will contain all the necessary fields that + * are needed for stopping Tx queue + */ +void +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_txq_meta *txq_meta) +{ + u8 tc; + + if (IS_ENABLED(CONFIG_DCB)) + tc = ring->dcb_tc; + else + tc = 0; + + txq_meta->q_id = ring->reg_idx; + txq_meta->q_teid = ring->txq_teid; + txq_meta->q_handle = ring->q_handle; + txq_meta->vsi_idx = vsi->idx; + txq_meta->tc = tc; +} diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h new file mode 100644 index 000000000000..407995e8e944 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_base.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_BASE_H_ +#define _ICE_BASE_H_ + +#include "ice.h" + +int ice_setup_rx_ctx(struct ice_ring *ring); +int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg); +int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx); +int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi); +void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi); +void ice_vsi_free_q_vectors(struct ice_vsi *vsi); +int +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_aqc_add_tx_qgrp *qg_buf); +void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector); +void +ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx); +void +ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); +void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); +int +ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, + u16 rel_vmvf_num, struct ice_ring *ring, + struct ice_txq_meta *txq_meta); +void +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_txq_meta *txq_meta); +#endif /* _ICE_BASE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2e0731c1e1a3..04d5db0a25bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4,28 +4,10 @@ #include "ice_common.h" #include "ice_sched.h" #include "ice_adminq_cmd.h" +#include "ice_flow.h" #define ICE_PF_RESET_WAIT_COUNT 200 -#define ICE_PROG_FLEX_ENTRY(hw, rxdid, mdid, idx) \ - wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(rxdid), \ - ((ICE_RX_OPC_MDID << \ - GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \ - GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \ - (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \ - GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M)) - -#define ICE_PROG_FLG_ENTRY(hw, rxdid, flg_0, flg_1, flg_2, flg_3, idx) \ - wr32((hw), GLFLXP_RXDID_FLAGS(rxdid, idx), \ - (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \ - (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \ - (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \ - (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \ - GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M)) - /** * ice_set_mac_type - Sets MAC type * @hw: pointer to the HW structure @@ -43,20 +25,6 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) } /** - * ice_dev_onetime_setup - Temporary HW/FW workarounds - * @hw: pointer to the HW structure - * - * This function provides temporary workarounds for certain issues - * that are expected to be fixed in the HW/FW. - */ -void ice_dev_onetime_setup(struct ice_hw *hw) -{ -#define MBX_PF_VT_PFALLOC 0x00231E80 - /* set VFs per PF */ - wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF)); -} - -/** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure * @@ -263,21 +231,23 @@ enum ice_status ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, struct ice_link_status *link, struct ice_sq_cd *cd) { - struct ice_link_status *hw_link_info_old, *hw_link_info; struct ice_aqc_get_link_status_data link_data = { 0 }; struct ice_aqc_get_link_status *resp; + struct ice_link_status *li_old, *li; enum ice_media_type *hw_media_type; struct ice_fc_info *hw_fc_info; bool tx_pause, rx_pause; struct ice_aq_desc desc; enum ice_status status; + struct ice_hw *hw; u16 cmd_flags; if (!pi) return ICE_ERR_PARAM; - hw_link_info_old = &pi->phy.link_info_old; + hw = pi->hw; + li_old = &pi->phy.link_info_old; hw_media_type = &pi->phy.media_type; - hw_link_info = &pi->phy.link_info; + li = &pi->phy.link_info; hw_fc_info = &pi->fc; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status); @@ -286,27 +256,27 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, resp->cmd_flags = cpu_to_le16(cmd_flags); resp->lport_num = pi->lport; - status = ice_aq_send_cmd(pi->hw, &desc, &link_data, sizeof(link_data), - cd); + status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), cd); if (status) return status; /* save off old link status information */ - *hw_link_info_old = *hw_link_info; + *li_old = *li; /* update current link status information */ - hw_link_info->link_speed = le16_to_cpu(link_data.link_speed); - hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low); - hw_link_info->phy_type_high = le64_to_cpu(link_data.phy_type_high); + li->link_speed = le16_to_cpu(link_data.link_speed); + li->phy_type_low = le64_to_cpu(link_data.phy_type_low); + li->phy_type_high = le64_to_cpu(link_data.phy_type_high); *hw_media_type = ice_get_media_type(pi); - hw_link_info->link_info = link_data.link_info; - hw_link_info->an_info = link_data.an_info; - hw_link_info->ext_info = link_data.ext_info; - hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size); - hw_link_info->fec_info = link_data.cfg & ICE_AQ_FEC_MASK; - hw_link_info->topo_media_conflict = link_data.topo_media_conflict; - hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M; + li->link_info = link_data.link_info; + li->an_info = link_data.an_info; + li->ext_info = link_data.ext_info; + li->max_frame_size = le16_to_cpu(link_data.max_frame_size); + li->fec_info = link_data.cfg & ICE_AQ_FEC_MASK; + li->topo_media_conflict = link_data.topo_media_conflict; + li->pacing = link_data.cfg & (ICE_AQ_CFG_PACING_M | + ICE_AQ_CFG_PACING_TYPE_M); /* update fc info */ tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX); @@ -320,12 +290,24 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, else hw_fc_info->current_mode = ICE_FC_NONE; - hw_link_info->lse_ena = - !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED)); + li->lse_ena = !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED)); + + ice_debug(hw, ICE_DBG_LINK, "link_speed = 0x%x\n", li->link_speed); + ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n", + (unsigned long long)li->phy_type_low); + ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n", + (unsigned long long)li->phy_type_high); + ice_debug(hw, ICE_DBG_LINK, "media_type = 0x%x\n", *hw_media_type); + ice_debug(hw, ICE_DBG_LINK, "link_info = 0x%x\n", li->link_info); + ice_debug(hw, ICE_DBG_LINK, "an_info = 0x%x\n", li->an_info); + ice_debug(hw, ICE_DBG_LINK, "ext_info = 0x%x\n", li->ext_info); + ice_debug(hw, ICE_DBG_LINK, "lse_ena = 0x%x\n", li->lse_ena); + ice_debug(hw, ICE_DBG_LINK, "max_frame = 0x%x\n", li->max_frame_size); + ice_debug(hw, ICE_DBG_LINK, "pacing = 0x%x\n", li->pacing); /* save link status information */ if (link) - *link = *hw_link_info; + *link = *li; /* flag cleared so calling functions don't call AQ again */ pi->phy.get_link_info = false; @@ -334,88 +316,6 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, } /** - * ice_init_flex_flags - * @hw: pointer to the hardware structure - * @prof_id: Rx Descriptor Builder profile ID - * - * Function to initialize Rx flex flags - */ -static void ice_init_flex_flags(struct ice_hw *hw, enum ice_rxdid prof_id) -{ - u8 idx = 0; - - /* Flex-flag fields (0-2) are programmed with FLG64 bits with layout: - * flexiflags0[5:0] - TCP flags, is_packet_fragmented, is_packet_UDP_GRE - * flexiflags1[3:0] - Not used for flag programming - * flexiflags2[7:0] - Tunnel and VLAN types - * 2 invalid fields in last index - */ - switch (prof_id) { - /* Rx flex flags are currently programmed for the NIC profiles only. - * Different flag bit programming configurations can be added per - * profile as needed. - */ - case ICE_RXDID_FLEX_NIC: - case ICE_RXDID_FLEX_NIC_2: - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_FRG, - ICE_FLG_UDP_GRE, ICE_FLG_PKT_DSI, - ICE_FLG_FIN, idx++); - /* flex flag 1 is not used for flexi-flag programming, skipping - * these four FLG64 bits. - */ - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_SYN, ICE_FLG_RST, - ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_DSI, - ICE_FLG_PKT_DSI, ICE_FLG_EVLAN_x8100, - ICE_FLG_EVLAN_x9100, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_VLAN_x8100, - ICE_FLG_TNL_VLAN, ICE_FLG_TNL_MAC, - ICE_FLG_TNL0, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_TNL1, ICE_FLG_TNL2, - ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx); - break; - - default: - ice_debug(hw, ICE_DBG_INIT, - "Flag programming for profile ID %d not supported\n", - prof_id); - } -} - -/** - * ice_init_flex_flds - * @hw: pointer to the hardware structure - * @prof_id: Rx Descriptor Builder profile ID - * - * Function to initialize flex descriptors - */ -static void ice_init_flex_flds(struct ice_hw *hw, enum ice_rxdid prof_id) -{ - enum ice_flex_rx_mdid mdid; - - switch (prof_id) { - case ICE_RXDID_FLEX_NIC: - case ICE_RXDID_FLEX_NIC_2: - ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_LOW, 0); - ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_HIGH, 1); - ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_FLOW_ID_LOWER, 2); - - mdid = (prof_id == ICE_RXDID_FLEX_NIC_2) ? - ICE_RX_MDID_SRC_VSI : ICE_RX_MDID_FLOW_ID_HIGH; - - ICE_PROG_FLEX_ENTRY(hw, prof_id, mdid, 3); - - ice_init_flex_flags(hw, prof_id); - break; - - default: - ice_debug(hw, ICE_DBG_INIT, - "Field init for profile ID %d not supported\n", - prof_id); - } -} - -/** * ice_init_fltr_mgmt_struct - initializes filter management list and locks * @hw: pointer to the HW struct */ @@ -688,10 +588,10 @@ void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf) } /** - * ice_get_itr_intrl_gran - determine int/intrl granularity + * ice_get_itr_intrl_gran * @hw: pointer to the HW struct * - * Determines the ITR/intrl granularities based on the maximum aggregate + * Determines the ITR/INTRL granularities based on the maximum aggregate * bandwidth according to the device's configuration during power-on. */ static void ice_get_itr_intrl_gran(struct ice_hw *hw) @@ -715,6 +615,29 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw) } /** + * ice_get_nvm_version - get cached NVM version data + * @hw: pointer to the hardware structure + * @oem_ver: 8 bit NVM version + * @oem_build: 16 bit NVM build number + * @oem_patch: 8 NVM patch number + * @ver_hi: high 16 bits of the NVM version + * @ver_lo: low 16 bits of the NVM version + */ +void +ice_get_nvm_version(struct ice_hw *hw, u8 *oem_ver, u16 *oem_build, + u8 *oem_patch, u8 *ver_hi, u8 *ver_lo) +{ + struct ice_nvm_info *nvm = &hw->nvm; + + *oem_ver = (u8)((nvm->oem_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT); + *oem_patch = (u8)(nvm->oem_ver & ICE_OEM_VER_PATCH_MASK); + *oem_build = (u16)((nvm->oem_ver & ICE_OEM_VER_BUILD_MASK) >> + ICE_OEM_VER_BUILD_SHIFT); + *ver_hi = (nvm->ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT; + *ver_lo = (nvm->ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT; +} + +/** * ice_init_hw - main hardware initialization routine * @hw: pointer to the hardware structure */ @@ -740,7 +663,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw) ice_get_itr_intrl_gran(hw); - status = ice_init_all_ctrlq(hw); + status = ice_create_all_ctrlq(hw); if (status) goto err_unroll_cqinit; @@ -818,13 +741,14 @@ enum ice_status ice_init_hw(struct ice_hw *hw) goto err_unroll_sched; } INIT_LIST_HEAD(&hw->agg_list); + /* Initialize max burst size */ + if (!hw->max_burst_size) + ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE); status = ice_init_fltr_mgmt_struct(hw); if (status) goto err_unroll_sched; - ice_dev_onetime_setup(hw); - /* Get MAC information */ /* A single port can report up to two (LAN and WoL) addresses */ mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2, @@ -842,10 +766,9 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_fltr_mgmt_struct; - - ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC); - ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC_2); - + status = ice_init_hw_tbls(hw); + if (status) + goto err_unroll_fltr_mgmt_struct; return 0; err_unroll_fltr_mgmt_struct: @@ -855,7 +778,7 @@ err_unroll_sched: err_unroll_alloc: devm_kfree(ice_hw_to_dev(hw), hw->port_info); err_unroll_cqinit: - ice_shutdown_all_ctrlq(hw); + ice_destroy_all_ctrlq(hw); return status; } @@ -873,6 +796,8 @@ void ice_deinit_hw(struct ice_hw *hw) ice_sched_cleanup_all(hw); ice_sched_clear_agg(hw); + ice_free_seg(hw); + ice_free_hw_tbls(hw); if (hw->port_info) { devm_kfree(ice_hw_to_dev(hw), hw->port_info); @@ -881,7 +806,7 @@ void ice_deinit_hw(struct ice_hw *hw) /* Attempt to disable FW logging before shutting down control queues */ ice_cfg_fw_log(hw, false); - ice_shutdown_all_ctrlq(hw); + ice_destroy_all_ctrlq(hw); /* Clear VSI contexts if not already cleared */ ice_clear_all_vsi_ctx(hw); @@ -893,7 +818,7 @@ void ice_deinit_hw(struct ice_hw *hw) */ enum ice_status ice_check_reset(struct ice_hw *hw) { - u32 cnt, reg = 0, grst_delay; + u32 cnt, reg = 0, grst_delay, uld_mask; /* Poll for Device Active state in case a recent CORER, GLOBR, * or EMPR has occurred. The grst delay value is in 100ms units. @@ -915,13 +840,20 @@ enum ice_status ice_check_reset(struct ice_hw *hw) return ICE_ERR_RESET_FAILED; } -#define ICE_RESET_DONE_MASK (GLNVM_ULD_CORER_DONE_M | \ - GLNVM_ULD_GLOBR_DONE_M) +#define ICE_RESET_DONE_MASK (GLNVM_ULD_PCIER_DONE_M |\ + GLNVM_ULD_PCIER_DONE_1_M |\ + GLNVM_ULD_CORER_DONE_M |\ + GLNVM_ULD_GLOBR_DONE_M |\ + GLNVM_ULD_POR_DONE_M |\ + GLNVM_ULD_POR_DONE_1_M |\ + GLNVM_ULD_PCIER_DONE_2_M) + + uld_mask = ICE_RESET_DONE_MASK; /* Device is Active; check Global Reset processes are done */ for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { - reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK; - if (reg == ICE_RESET_DONE_MASK) { + reg = rd32(hw, GLNVM_ULD) & uld_mask; + if (reg == uld_mask) { ice_debug(hw, ICE_DBG_INIT, "Global reset processes done. %d\n", cnt); break; @@ -1026,6 +958,72 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req) } /** + * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA + * @hw: pointer to hardware structure + * @module_tlv: pointer to module TLV to return + * @module_tlv_len: pointer to module TLV length to return + * @module_type: module type requested + * + * Finds the requested sub module TLV type from the Preserved Field + * Area (PFA) and returns the TLV pointer and length. The caller can + * use these to read the variable length TLV value. + */ +enum ice_status +ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + u16 module_type) +{ + enum ice_status status; + u16 pfa_len, pfa_ptr; + u16 next_tlv; + + status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n"); + return status; + } + status = ice_read_sr_word(hw, pfa_ptr, &pfa_len); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n"); + return status; + } + /* Starting with first TLV after PFA length, iterate through the list + * of TLVs to find the requested one. + */ + next_tlv = pfa_ptr + 1; + while (next_tlv < pfa_ptr + pfa_len) { + u16 tlv_sub_module_type; + u16 tlv_len; + + /* Read TLV type */ + status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n"); + break; + } + /* Read TLV length */ + status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n"); + break; + } + if (tlv_sub_module_type == module_type) { + if (tlv_len) { + *module_tlv = next_tlv; + *module_tlv_len = tlv_len; + return 0; + } + return ICE_ERR_INVAL_SIZE; + } + /* Check next TLV, i.e. current TLV pointer + length + 2 words + * (for current TLV's type and length) + */ + next_tlv = next_tlv + tlv_len + 2; + } + /* Module does not exist */ + return ICE_ERR_DOES_NOT_EXIST; +} + +/** * ice_copy_rxq_ctx_to_hw * @hw: pointer to the hardware structure * @ice_rxq_ctx: pointer to the rxq context @@ -1078,6 +1076,7 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = { ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena, 1, 195), ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196), ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198), + ICE_CTX_STORE(ice_rlan_ctx, prefena, 1, 201), { 0 } }; @@ -1088,7 +1087,8 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = { * @rxq_index: the index of the Rx queue * * Converts rxq context from sparse to dense structure and then writes - * it to HW register space + * it to HW register space and enables the hardware to prefetch descriptors + * instead of only fetching them on demand */ enum ice_status ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, @@ -1096,6 +1096,11 @@ ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, { u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 }; + if (!rlan_ctx) + return ICE_ERR_BAD_PTR; + + rlan_ctx->prefena = 1; + ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info); return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index); } @@ -1111,6 +1116,7 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { ICE_CTX_STORE(ice_tlan_ctx, vmvf_type, 2, 78), ICE_CTX_STORE(ice_tlan_ctx, src_vsi, 10, 80), ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena, 1, 90), + ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag, 1, 91), ICE_CTX_STORE(ice_tlan_ctx, alt_vlan, 1, 92), ICE_CTX_STORE(ice_tlan_ctx, cpuid, 8, 93), ICE_CTX_STORE(ice_tlan_ctx, wb_mode, 1, 101), @@ -1129,62 +1135,18 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165), ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166), ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168), - ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 110, 171), + ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 122, 171), { 0 } }; -/** - * ice_debug_cq - * @hw: pointer to the hardware structure - * @mask: debug mask - * @desc: pointer to control queue descriptor - * @buf: pointer to command buffer - * @buf_len: max length of buf - * - * Dumps debug log about control command with descriptor contents. - */ -void -ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, void *buf, - u16 buf_len) -{ - struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; - u16 len; - -#ifndef CONFIG_DYNAMIC_DEBUG - if (!(mask & hw->debug_mask)) - return; -#endif - - if (!desc) - return; - - len = le16_to_cpu(cq_desc->datalen); - - ice_debug(hw, mask, - "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", - le16_to_cpu(cq_desc->opcode), - le16_to_cpu(cq_desc->flags), - le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval)); - ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n", - le32_to_cpu(cq_desc->cookie_high), - le32_to_cpu(cq_desc->cookie_low)); - ice_debug(hw, mask, "\tparam (0,1) 0x%08X 0x%08X\n", - le32_to_cpu(cq_desc->params.generic.param0), - le32_to_cpu(cq_desc->params.generic.param1)); - ice_debug(hw, mask, "\taddr (h,l) 0x%08X 0x%08X\n", - le32_to_cpu(cq_desc->params.generic.addr_high), - le32_to_cpu(cq_desc->params.generic.addr_low)); - if (buf && cq_desc->datalen != 0) { - ice_debug(hw, mask, "Buffer:\n"); - if (buf_len < len) - len = buf_len; - - ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len); - } -} - /* FW Admin Queue command wrappers */ +/* Software lock/mutex that is meant to be held while the Global Config Lock + * in firmware is acquired by the software to prevent most (but not all) types + * of AQ commands from being sent to FW + */ +DEFINE_MUTEX(ice_global_cfg_lock_sw); + /** * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue * @hw: pointer to the HW struct @@ -1199,7 +1161,38 @@ enum ice_status ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd) { - return ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd); + struct ice_aqc_req_res *cmd = &desc->params.res_owner; + bool lock_acquired = false; + enum ice_status status; + + /* When a package download is in process (i.e. when the firmware's + * Global Configuration Lock resource is held), only the Download + * Package, Get Version, Get Package Info List and Release Resource + * (with resource ID set to Global Config Lock) AdminQ commands are + * allowed; all others must block until the package download completes + * and the Global Config Lock is released. See also + * ice_acquire_global_cfg_lock(). + */ + switch (le16_to_cpu(desc->opcode)) { + case ice_aqc_opc_download_pkg: + case ice_aqc_opc_get_pkg_info_list: + case ice_aqc_opc_get_ver: + break; + case ice_aqc_opc_release_res: + if (le16_to_cpu(cmd->res_id) == ICE_AQC_RES_ID_GLBL_LOCK) + break; + /* fall-through */ + default: + mutex_lock(&ice_global_cfg_lock_sw); + lock_acquired = true; + break; + } + + status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd); + if (lock_acquired) + mutex_unlock(&ice_global_cfg_lock_sw); + + return status; } /** @@ -1237,6 +1230,43 @@ enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd) } /** + * ice_aq_send_driver_ver + * @hw: pointer to the HW struct + * @dv: driver's major, minor version + * @cd: pointer to command details structure or NULL + * + * Send the driver version (0x0002) to the firmware + */ +enum ice_status +ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv, + struct ice_sq_cd *cd) +{ + struct ice_aqc_driver_ver *cmd; + struct ice_aq_desc desc; + u16 len; + + cmd = &desc.params.driver_ver; + + if (!dv) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_ver); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + cmd->major_ver = dv->major_ver; + cmd->minor_ver = dv->minor_ver; + cmd->build_ver = dv->build_ver; + cmd->subbuild_ver = dv->subbuild_ver; + + len = 0; + while (len < sizeof(dv->driver_string) && + isascii(dv->driver_string[len]) && dv->driver_string[len]) + len++; + + return ice_aq_send_cmd(hw, &desc, dv->driver_string, len, cd); +} + +/** * ice_aq_q_shutdown * @hw: pointer to the HW struct * @unloading: is the driver unloading itself @@ -1254,7 +1284,7 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading) ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown); if (unloading) - cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING); + cmd->driver_unloading = ICE_AQC_DRIVER_UNLOADING; return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } @@ -1459,6 +1489,114 @@ void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res) } /** + * ice_aq_alloc_free_res - command to allocate/free resources + * @hw: pointer to the HW struct + * @num_entries: number of resource entries in buffer + * @buf: Indirect buffer to hold data parameters and response + * @buf_size: size of buffer for indirect commands + * @opc: pass in the command opcode + * @cd: pointer to command details structure or NULL + * + * Helper function to allocate/free resources using the admin queue commands + */ +enum ice_status +ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, + struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size, + enum ice_adminq_opc opc, struct ice_sq_cd *cd) +{ + struct ice_aqc_alloc_free_res_cmd *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.sw_res_ctrl; + + if (!buf) + return ICE_ERR_PARAM; + + if (buf_size < (num_entries * sizeof(buf->elem[0]))) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, opc); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_entries = cpu_to_le16(num_entries); + + return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); +} + +/** + * ice_alloc_hw_res - allocate resource + * @hw: pointer to the HW struct + * @type: type of resource + * @num: number of resources to allocate + * @btm: allocate from bottom + * @res: pointer to array that will receive the resources + */ +enum ice_status +ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res) +{ + struct ice_aqc_alloc_free_res_elem *buf; + enum ice_status status; + u16 buf_len; + + buf_len = struct_size(buf, elem, num - 1); + buf = kzalloc(buf_len, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + /* Prepare buffer to allocate resource. */ + buf->num_elems = cpu_to_le16(num); + buf->res_type = cpu_to_le16(type | ICE_AQC_RES_TYPE_FLAG_DEDICATED | + ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX); + if (btm) + buf->res_type |= cpu_to_le16(ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM); + + status = ice_aq_alloc_free_res(hw, 1, buf, buf_len, + ice_aqc_opc_alloc_res, NULL); + if (status) + goto ice_alloc_res_exit; + + memcpy(res, buf->elem, sizeof(buf->elem) * num); + +ice_alloc_res_exit: + kfree(buf); + return status; +} + +/** + * ice_free_hw_res - free allocated HW resource + * @hw: pointer to the HW struct + * @type: type of resource to free + * @num: number of resources + * @res: pointer to array that contains the resources to free + */ +enum ice_status +ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res) +{ + struct ice_aqc_alloc_free_res_elem *buf; + enum ice_status status; + u16 buf_len; + + buf_len = struct_size(buf, elem, num - 1); + buf = kzalloc(buf_len, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + /* Prepare buffer to free resource. */ + buf->num_elems = cpu_to_le16(num); + buf->res_type = cpu_to_le16(type); + memcpy(buf->elem, res, sizeof(buf->elem) * num); + + status = ice_aq_alloc_free_res(hw, num, buf, buf_len, + ice_aqc_opc_free_res, NULL); + if (status) + ice_debug(hw, ICE_DBG_SW, "CQ CMD Buffer:\n"); + + kfree(buf); + return status; +} + +/** * ice_get_num_per_func - determine number of resources per PF * @hw: pointer to the HW structure * @max: value to be evenly split between each PF @@ -1529,29 +1667,33 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, case ICE_AQC_CAPS_VALID_FUNCTIONS: caps->valid_functions = number; ice_debug(hw, ICE_DBG_INIT, - "%s: valid functions = %d\n", prefix, + "%s: valid_functions (bitmap) = %d\n", prefix, caps->valid_functions); + + /* store func count for resource management purposes */ + if (dev_p) + dev_p->num_funcs = hweight32(number); break; case ICE_AQC_CAPS_SRIOV: caps->sr_iov_1_1 = (number == 1); ice_debug(hw, ICE_DBG_INIT, - "%s: SR-IOV = %d\n", prefix, + "%s: sr_iov_1_1 = %d\n", prefix, caps->sr_iov_1_1); break; case ICE_AQC_CAPS_VF: if (dev_p) { dev_p->num_vfs_exposed = number; ice_debug(hw, ICE_DBG_INIT, - "%s: VFs exposed = %d\n", prefix, + "%s: num_vfs_exposed = %d\n", prefix, dev_p->num_vfs_exposed); } else if (func_p) { func_p->num_allocd_vfs = number; func_p->vf_base_id = logical_id; ice_debug(hw, ICE_DBG_INIT, - "%s: VFs allocated = %d\n", prefix, + "%s: num_allocd_vfs = %d\n", prefix, func_p->num_allocd_vfs); ice_debug(hw, ICE_DBG_INIT, - "%s: VF base_id = %d\n", prefix, + "%s: vf_base_id = %d\n", prefix, func_p->vf_base_id); } break; @@ -1559,63 +1701,75 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, if (dev_p) { dev_p->num_vsi_allocd_to_host = number; ice_debug(hw, ICE_DBG_INIT, - "%s: num VSI alloc to host = %d\n", + "%s: num_vsi_allocd_to_host = %d\n", prefix, dev_p->num_vsi_allocd_to_host); } else if (func_p) { func_p->guar_num_vsi = ice_get_num_per_func(hw, ICE_MAX_VSI); ice_debug(hw, ICE_DBG_INIT, - "%s: num guaranteed VSI (fw) = %d\n", + "%s: guar_num_vsi (fw) = %d\n", prefix, number); ice_debug(hw, ICE_DBG_INIT, - "%s: num guaranteed VSI = %d\n", + "%s: guar_num_vsi = %d\n", prefix, func_p->guar_num_vsi); } break; + case ICE_AQC_CAPS_DCB: + caps->dcb = (number == 1); + caps->active_tc_bitmap = logical_id; + caps->maxtc = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "%s: dcb = %d\n", prefix, caps->dcb); + ice_debug(hw, ICE_DBG_INIT, + "%s: active_tc_bitmap = %d\n", prefix, + caps->active_tc_bitmap); + ice_debug(hw, ICE_DBG_INIT, + "%s: maxtc = %d\n", prefix, caps->maxtc); + break; case ICE_AQC_CAPS_RSS: caps->rss_table_size = number; caps->rss_table_entry_width = logical_id; ice_debug(hw, ICE_DBG_INIT, - "%s: RSS table size = %d\n", prefix, + "%s: rss_table_size = %d\n", prefix, caps->rss_table_size); ice_debug(hw, ICE_DBG_INIT, - "%s: RSS table width = %d\n", prefix, + "%s: rss_table_entry_width = %d\n", prefix, caps->rss_table_entry_width); break; case ICE_AQC_CAPS_RXQS: caps->num_rxq = number; caps->rxq_first_id = phys_id; ice_debug(hw, ICE_DBG_INIT, - "%s: num Rx queues = %d\n", prefix, + "%s: num_rxq = %d\n", prefix, caps->num_rxq); ice_debug(hw, ICE_DBG_INIT, - "%s: Rx first queue ID = %d\n", prefix, + "%s: rxq_first_id = %d\n", prefix, caps->rxq_first_id); break; case ICE_AQC_CAPS_TXQS: caps->num_txq = number; caps->txq_first_id = phys_id; ice_debug(hw, ICE_DBG_INIT, - "%s: num Tx queues = %d\n", prefix, + "%s: num_txq = %d\n", prefix, caps->num_txq); ice_debug(hw, ICE_DBG_INIT, - "%s: Tx first queue ID = %d\n", prefix, + "%s: txq_first_id = %d\n", prefix, caps->txq_first_id); break; case ICE_AQC_CAPS_MSIX: caps->num_msix_vectors = number; caps->msix_vector_first_id = phys_id; ice_debug(hw, ICE_DBG_INIT, - "%s: MSIX vector count = %d\n", prefix, + "%s: num_msix_vectors = %d\n", prefix, caps->num_msix_vectors); ice_debug(hw, ICE_DBG_INIT, - "%s: MSIX first vector index = %d\n", prefix, + "%s: msix_vector_first_id = %d\n", prefix, caps->msix_vector_first_id); break; case ICE_AQC_CAPS_MAX_MTU: caps->max_mtu = number; - ice_debug(hw, ICE_DBG_INIT, "%s: max MTU = %d\n", + ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n", prefix, caps->max_mtu); break; default: @@ -1625,6 +1779,18 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, break; } } + + /* Re-calculate capabilities that are dependent on the number of + * physical ports; i.e. some features are not supported or function + * differently on devices with more than 4 ports. + */ + if (hw->dev_caps.num_funcs > 4) { + /* Max 4 TCs per port */ + caps->maxtc = 4; + ice_debug(hw, ICE_DBG_INIT, + "%s: maxtc = %d (based on #ports)\n", prefix, + caps->maxtc); + } } /** @@ -1712,6 +1878,70 @@ ice_discover_caps(struct ice_hw *hw, enum ice_adminq_opc opc) } /** + * ice_set_safe_mode_caps - Override dev/func capabilities when in safe mode + * @hw: pointer to the hardware structure + */ +void ice_set_safe_mode_caps(struct ice_hw *hw) +{ + struct ice_hw_func_caps *func_caps = &hw->func_caps; + struct ice_hw_dev_caps *dev_caps = &hw->dev_caps; + u32 valid_func, rxq_first_id, txq_first_id; + u32 msix_vector_first_id, max_mtu; + u32 num_funcs; + + /* cache some func_caps values that should be restored after memset */ + valid_func = func_caps->common_cap.valid_functions; + txq_first_id = func_caps->common_cap.txq_first_id; + rxq_first_id = func_caps->common_cap.rxq_first_id; + msix_vector_first_id = func_caps->common_cap.msix_vector_first_id; + max_mtu = func_caps->common_cap.max_mtu; + + /* unset func capabilities */ + memset(func_caps, 0, sizeof(*func_caps)); + + /* restore cached values */ + func_caps->common_cap.valid_functions = valid_func; + func_caps->common_cap.txq_first_id = txq_first_id; + func_caps->common_cap.rxq_first_id = rxq_first_id; + func_caps->common_cap.msix_vector_first_id = msix_vector_first_id; + func_caps->common_cap.max_mtu = max_mtu; + + /* one Tx and one Rx queue in safe mode */ + func_caps->common_cap.num_rxq = 1; + func_caps->common_cap.num_txq = 1; + + /* two MSIX vectors, one for traffic and one for misc causes */ + func_caps->common_cap.num_msix_vectors = 2; + func_caps->guar_num_vsi = 1; + + /* cache some dev_caps values that should be restored after memset */ + valid_func = dev_caps->common_cap.valid_functions; + txq_first_id = dev_caps->common_cap.txq_first_id; + rxq_first_id = dev_caps->common_cap.rxq_first_id; + msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id; + max_mtu = dev_caps->common_cap.max_mtu; + num_funcs = dev_caps->num_funcs; + + /* unset dev capabilities */ + memset(dev_caps, 0, sizeof(*dev_caps)); + + /* restore cached values */ + dev_caps->common_cap.valid_functions = valid_func; + dev_caps->common_cap.txq_first_id = txq_first_id; + dev_caps->common_cap.rxq_first_id = rxq_first_id; + dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id; + dev_caps->common_cap.max_mtu = max_mtu; + dev_caps->num_funcs = num_funcs; + + /* one Tx and one Rx queue per function in safe mode */ + dev_caps->common_cap.num_rxq = num_funcs; + dev_caps->common_cap.num_txq = num_funcs; + + /* two MSIX vectors per function */ + dev_caps->common_cap.num_msix_vectors = 2 * num_funcs; +} + +/** * ice_get_caps - get info about the HW * @hw: pointer to the hardware structure */ @@ -1993,6 +2223,17 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport, desc.params.set_phy.lport_num = lport; desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n", + (unsigned long long)le64_to_cpu(cfg->phy_type_low)); + ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n", + (unsigned long long)le64_to_cpu(cfg->phy_type_high)); + ice_debug(hw, ICE_DBG_LINK, "caps = 0x%x\n", cfg->caps); + ice_debug(hw, ICE_DBG_LINK, "low_power_ctrl = 0x%x\n", + cfg->low_power_ctrl); + ice_debug(hw, ICE_DBG_LINK, "eee_cap = 0x%x\n", cfg->eee_cap); + ice_debug(hw, ICE_DBG_LINK, "eeer_value = 0x%x\n", cfg->eeer_value); + ice_debug(hw, ICE_DBG_LINK, "link_fec_opt = 0x%x\n", cfg->link_fec_opt); + return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd); } @@ -2024,7 +2265,7 @@ enum ice_status ice_update_link_info(struct ice_port_info *pi) if (!pcaps) return ICE_ERR_NO_MEMORY; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL); if (!status) memcpy(li->module_type, &pcaps->module_type, @@ -2174,27 +2415,24 @@ ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec) { switch (fec) { case ICE_FEC_BASER: - /* Clear auto FEC and RS bits, and AND BASE-R ability + /* Clear RS bits, and AND BASE-R ability * bits and OR request bits. */ - cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN | ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN; cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ | ICE_AQC_PHY_FEC_25G_KR_REQ; break; case ICE_FEC_RS: - /* Clear auto FEC and BASE-R bits, and AND RS ability + /* Clear BASE-R bits, and AND RS ability * bits and OR request bits. */ - cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN; cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ | ICE_AQC_PHY_FEC_25G_RS_544_REQ; break; case ICE_FEC_NONE: - /* Clear auto FEC and all FEC option bits. */ - cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; + /* Clear all FEC option bits. */ cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK; break; case ICE_FEC_AUTO: @@ -2344,6 +2582,52 @@ ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, } /** + * ice_aq_sff_eeprom + * @hw: pointer to the HW struct + * @lport: bits [7:0] = logical port, bit [8] = logical port valid + * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default) + * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding. + * @page: QSFP page + * @set_page: set or ignore the page + * @data: pointer to data buffer to be read/written to the I2C device. + * @length: 1-16 for read, 1 for write. + * @write: 0 read, 1 for write. + * @cd: pointer to command details structure or NULL + * + * Read/Write SFF EEPROM (0x06EE) + */ +enum ice_status +ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, + u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, + bool write, struct ice_sq_cd *cd) +{ + struct ice_aqc_sff_eeprom *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + if (!data || (mem_addr & 0xff00)) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom); + cmd = &desc.params.read_write_sff_param; + desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF); + cmd->lport_num = (u8)(lport & 0xff); + cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); + cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) & + ICE_AQC_SFF_I2CBUS_7BIT_M) | + ((set_page << + ICE_AQC_SFF_SET_EEPROM_PAGE_S) & + ICE_AQC_SFF_SET_EEPROM_PAGE_M)); + cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff); + cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S); + if (write) + cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE); + + status = ice_aq_send_cmd(hw, &desc, data, length, cd); + return status; +} + +/** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure * @vsi_id: VSI FW index @@ -2936,7 +3220,7 @@ ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) * @tc: TC number * @q_handle: software queue handle */ -static struct ice_q_ctx * +struct ice_q_ctx * ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle) { struct ice_vsi_ctx *vsi; @@ -3033,9 +3317,12 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, node.node_teid = buf->txqs[0].q_teid; node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF; q_ctx->q_handle = q_handle; + q_ctx->q_teid = le32_to_cpu(node.node_teid); - /* add a leaf node into schduler tree queue layer */ + /* add a leaf node into scheduler tree queue layer */ status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node); + if (!status) + status = ice_sched_replay_q_bw(pi, q_ctx); ena_txq_exit: mutex_unlock(&pi->sched_lock); @@ -3219,7 +3506,10 @@ enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle) if (status) return status; } - + /* Replay per VSI all RSS configurations */ + status = ice_replay_rss_cfg(hw, vsi_handle); + if (status) + return status; /* Replay per VSI all filters */ status = ice_replay_vsi_all_fltr(hw, vsi_handle); return status; @@ -3240,40 +3530,44 @@ void ice_replay_post(struct ice_hw *hw) /** * ice_stat_update40 - read 40 bit stat from the chip and update stat values * @hw: ptr to the hardware info - * @hireg: high 32 bit HW register to read from - * @loreg: low 32 bit HW register to read from + * @reg: offset of 64 bit HW register to read from * @prev_stat_loaded: bool to specify if previous stats are loaded * @prev_stat: ptr to previous loaded stat value * @cur_stat: ptr to current stat value */ void -ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, - bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat) +ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, + u64 *prev_stat, u64 *cur_stat) { - u64 new_data; - - new_data = rd32(hw, loreg); - new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; + u64 new_data = rd64(hw, reg) & (BIT_ULL(40) - 1); /* device stats are not reset at PFR, they likely will not be zeroed - * when the driver starts. So save the first values read and use them as - * offsets to be subtracted from the raw values in order to report stats - * that count from zero. + * when the driver starts. Thus, save the value from the first read + * without adding to the statistic value so that we report stats which + * count up from zero. */ - if (!prev_stat_loaded) + if (!prev_stat_loaded) { *prev_stat = new_data; + return; + } + + /* Calculate the difference between the new and old values, and then + * add it to the software stat value. + */ if (new_data >= *prev_stat) - *cur_stat = new_data - *prev_stat; + *cur_stat += new_data - *prev_stat; else /* to manage the potential roll-over */ - *cur_stat = (new_data + BIT_ULL(40)) - *prev_stat; - *cur_stat &= 0xFFFFFFFFFFULL; + *cur_stat += (new_data + BIT_ULL(40)) - *prev_stat; + + /* Update the previously stored value to prepare for next read */ + *prev_stat = new_data; } /** * ice_stat_update32 - read 32 bit stat from the chip and update stat values * @hw: ptr to the hardware info - * @reg: HW register to read from + * @reg: offset of HW register to read from * @prev_stat_loaded: bool to specify if previous stats are loaded * @prev_stat: ptr to previous loaded stat value * @cur_stat: ptr to current stat value @@ -3287,17 +3581,26 @@ ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, new_data = rd32(hw, reg); /* device stats are not reset at PFR, they likely will not be zeroed - * when the driver starts. So save the first values read and use them as - * offsets to be subtracted from the raw values in order to report stats - * that count from zero. + * when the driver starts. Thus, save the value from the first read + * without adding to the statistic value so that we report stats which + * count up from zero. */ - if (!prev_stat_loaded) + if (!prev_stat_loaded) { *prev_stat = new_data; + return; + } + + /* Calculate the difference between the new and old values, and then + * add it to the software stat value. + */ if (new_data >= *prev_stat) - *cur_stat = new_data - *prev_stat; + *cur_stat += new_data - *prev_stat; else /* to manage the potential roll-over */ - *cur_stat = (new_data + BIT_ULL(32)) - *prev_stat; + *cur_stat += (new_data + BIT_ULL(32)) - *prev_stat; + + /* Update the previously stored value to prepare for next read */ + *prev_stat = new_data; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index d1f8353fe6bb..f9fc005d35a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -6,19 +6,24 @@ #include "ice.h" #include "ice_type.h" +#include "ice_nvm.h" +#include "ice_flex_pipe.h" #include "ice_switch.h" #include <linux/avf/virtchnl.h> enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw); -void -ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len); enum ice_status ice_init_hw(struct ice_hw *hw); void ice_deinit_hw(struct ice_hw *hw); +enum ice_status +ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + u16 module_type); enum ice_status ice_check_reset(struct ice_hw *hw); enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req); +enum ice_status ice_create_all_ctrlq(struct ice_hw *hw); enum ice_status ice_init_all_ctrlq(struct ice_hw *hw); void ice_shutdown_all_ctrlq(struct ice_hw *hw); +void ice_destroy_all_ctrlq(struct ice_hw *hw); enum ice_status ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_rq_event_info *e, u16 *pending); @@ -29,17 +34,25 @@ enum ice_status ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, enum ice_aq_res_access_type access, u32 timeout); void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); +enum ice_status +ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res); +enum ice_status +ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res); enum ice_status ice_init_nvm(struct ice_hw *hw); enum ice_status ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data); enum ice_status +ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, + struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size, + enum ice_adminq_opc opc, struct ice_sq_cd *cd); +enum ice_status ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); void ice_clear_pxe_mode(struct ice_hw *hw); enum ice_status ice_get_caps(struct ice_hw *hw); -void ice_dev_onetime_setup(struct ice_hw *hw); +void ice_set_safe_mode_caps(struct ice_hw *hw); enum ice_status ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, @@ -64,12 +77,18 @@ void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); extern const struct ice_ctx_ele ice_tlan_ctx_info[]; enum ice_status ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info); + +extern struct mutex ice_global_cfg_lock_sw; + enum ice_status ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd); enum ice_status +ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv, + struct ice_sq_cd *cd); +enum ice_status ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, struct ice_aqc_get_phy_caps_data *caps, struct ice_sq_cd *cd); @@ -106,6 +125,10 @@ ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd); enum ice_status ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, struct ice_sq_cd *cd); +enum ice_status +ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, + u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, + bool write, struct ice_sq_cd *cd); enum ice_status ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues, @@ -122,12 +145,17 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle); void ice_replay_post(struct ice_hw *hw); void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf); +struct ice_q_ctx * +ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle); void -ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, - bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); +ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, + u64 *prev_stat, u64 *cur_stat); void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); +void +ice_get_nvm_version(struct ice_hw *hw, u8 *oem_ver, u16 *oem_build, + u8 *oem_patch, u8 *ver_hi, u8 *ver_lo); enum ice_status ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_get_elem *buf); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index e91ac4df0242..dd946866d7b8 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -310,7 +310,7 @@ ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) * @cq: pointer to the specific Control queue * * This is the main initialization routine for the Control Send Queue - * Prior to calling this function, drivers *MUST* set the following fields + * Prior to calling this function, the driver *MUST* set the following fields * in the cq->structure: * - cq->num_sq_entries * - cq->sq_buf_size @@ -369,7 +369,7 @@ init_ctrlq_exit: * @cq: pointer to the specific Control queue * * The main initialization routine for the Admin Receive (Event) Queue. - * Prior to calling this function, drivers *MUST* set the following fields + * Prior to calling this function, the driver *MUST* set the following fields * in the cq->structure: * - cq->num_rq_entries * - cq->rq_buf_size @@ -569,14 +569,8 @@ static enum ice_status ice_init_check_adminq(struct ice_hw *hw) return 0; init_ctrlq_free_rq: - if (cq->rq.count) { - ice_shutdown_rq(hw, cq); - mutex_destroy(&cq->rq_lock); - } - if (cq->sq.count) { - ice_shutdown_sq(hw, cq); - mutex_destroy(&cq->sq_lock); - } + ice_shutdown_rq(hw, cq); + ice_shutdown_sq(hw, cq); return status; } @@ -585,12 +579,14 @@ init_ctrlq_free_rq: * @hw: pointer to the hardware structure * @q_type: specific Control queue type * - * Prior to calling this function, drivers *MUST* set the following fields + * Prior to calling this function, the driver *MUST* set the following fields * in the cq->structure: * - cq->num_sq_entries * - cq->num_rq_entries * - cq->rq_buf_size * - cq->sq_buf_size + * + * NOTE: this function does not initialize the controlq locks */ static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) { @@ -616,8 +612,6 @@ static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) !cq->rq_buf_size || !cq->sq_buf_size) { return ICE_ERR_CFG; } - mutex_init(&cq->sq_lock); - mutex_init(&cq->rq_lock); /* setup SQ command write back timeout */ cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT; @@ -625,7 +619,7 @@ static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) /* allocate the ATQ */ ret_code = ice_init_sq(hw, cq); if (ret_code) - goto init_ctrlq_destroy_locks; + return ret_code; /* allocate the ARQ */ ret_code = ice_init_rq(hw, cq); @@ -637,9 +631,6 @@ static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) init_ctrlq_free_sq: ice_shutdown_sq(hw, cq); -init_ctrlq_destroy_locks: - mutex_destroy(&cq->sq_lock); - mutex_destroy(&cq->rq_lock); return ret_code; } @@ -647,12 +638,14 @@ init_ctrlq_destroy_locks: * ice_init_all_ctrlq - main initialization routine for all control queues * @hw: pointer to the hardware structure * - * Prior to calling this function, drivers *MUST* set the following fields + * Prior to calling this function, the driver MUST* set the following fields * in the cq->structure for all control queues: * - cq->num_sq_entries * - cq->num_rq_entries * - cq->rq_buf_size * - cq->sq_buf_size + * + * NOTE: this function does not initialize the controlq locks. */ enum ice_status ice_init_all_ctrlq(struct ice_hw *hw) { @@ -672,9 +665,47 @@ enum ice_status ice_init_all_ctrlq(struct ice_hw *hw) } /** + * ice_init_ctrlq_locks - Initialize locks for a control queue + * @cq: pointer to the control queue + * + * Initializes the send and receive queue locks for a given control queue. + */ +static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq) +{ + mutex_init(&cq->sq_lock); + mutex_init(&cq->rq_lock); +} + +/** + * ice_create_all_ctrlq - main initialization routine for all control queues + * @hw: pointer to the hardware structure + * + * Prior to calling this function, the driver *MUST* set the following fields + * in the cq->structure for all control queues: + * - cq->num_sq_entries + * - cq->num_rq_entries + * - cq->rq_buf_size + * - cq->sq_buf_size + * + * This function creates all the control queue locks and then calls + * ice_init_all_ctrlq. It should be called once during driver load. If the + * driver needs to re-initialize control queues at run time it should call + * ice_init_all_ctrlq instead. + */ +enum ice_status ice_create_all_ctrlq(struct ice_hw *hw) +{ + ice_init_ctrlq_locks(&hw->adminq); + ice_init_ctrlq_locks(&hw->mailboxq); + + return ice_init_all_ctrlq(hw); +} + +/** * ice_shutdown_ctrlq - shutdown routine for any control queue * @hw: pointer to the hardware structure * @q_type: specific Control queue type + * + * NOTE: this function does not destroy the control queue locks. */ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) { @@ -693,19 +724,17 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) return; } - if (cq->sq.count) { - ice_shutdown_sq(hw, cq); - mutex_destroy(&cq->sq_lock); - } - if (cq->rq.count) { - ice_shutdown_rq(hw, cq); - mutex_destroy(&cq->rq_lock); - } + ice_shutdown_sq(hw, cq); + ice_shutdown_rq(hw, cq); } /** * ice_shutdown_all_ctrlq - shutdown routine for all control queues * @hw: pointer to the hardware structure + * + * NOTE: this function does not destroy the control queue locks. The driver + * may call this at runtime to shutdown and later restart control queues, such + * as in response to a reset event. */ void ice_shutdown_all_ctrlq(struct ice_hw *hw) { @@ -716,6 +745,37 @@ void ice_shutdown_all_ctrlq(struct ice_hw *hw) } /** + * ice_destroy_ctrlq_locks - Destroy locks for a control queue + * @cq: pointer to the control queue + * + * Destroys the send and receive queue locks for a given control queue. + */ +static void +ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq) +{ + mutex_destroy(&cq->sq_lock); + mutex_destroy(&cq->rq_lock); +} + +/** + * ice_destroy_all_ctrlq - exit routine for all control queues + * @hw: pointer to the hardware structure + * + * This function shuts down all the control queues and then destroys the + * control queue locks. It should be called once during driver unload. The + * driver should call ice_shutdown_all_ctrlq if it needs to shut down and + * reinitialize control queues, such as in response to a reset event. + */ +void ice_destroy_all_ctrlq(struct ice_hw *hw) +{ + /* shut down all the control queues first */ + ice_shutdown_all_ctrlq(hw); + + ice_destroy_ctrlq_locks(&hw->adminq); + ice_destroy_ctrlq_locks(&hw->mailboxq); +} + +/** * ice_clean_sq - cleans Admin send queue (ATQ) * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue @@ -750,6 +810,52 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) } /** + * ice_debug_cq + * @hw: pointer to the hardware structure + * @desc: pointer to control queue descriptor + * @buf: pointer to command buffer + * @buf_len: max length of buf + * + * Dumps debug log about control command with descriptor contents. + */ +static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len) +{ + struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; + u16 len; + + if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) && + !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask)) + return; + + if (!desc) + return; + + len = le16_to_cpu(cq_desc->datalen); + + ice_debug(hw, ICE_DBG_AQ_DESC, + "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", + le16_to_cpu(cq_desc->opcode), + le16_to_cpu(cq_desc->flags), + le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval)); + ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->cookie_high), + le32_to_cpu(cq_desc->cookie_low)); + ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.param0), + le32_to_cpu(cq_desc->params.generic.param1)); + ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.addr_high), + le32_to_cpu(cq_desc->params.generic.addr_low)); + if (buf && cq_desc->datalen != 0) { + ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n"); + if (buf_len < len) + len = buf_len; + + ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len); + } +} + +/** * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ) * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue @@ -874,10 +980,10 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, } /* Debug desc and buffer */ - ice_debug(hw, ICE_DBG_AQ_MSG, + ice_debug(hw, ICE_DBG_AQ_DESC, "ATQ: Control Send queue desc and buffer:\n"); - ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size); + ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size); (cq->sq.next_to_use)++; if (cq->sq.next_to_use == cq->sq.count) @@ -888,7 +994,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (ice_sq_done(hw, cq)) break; - mdelay(1); + udelay(ICE_CTL_Q_SQ_CMD_USEC); total_delay++; } while (total_delay < cq->sq_cmd_timeout); @@ -911,7 +1017,8 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, retval = le16_to_cpu(desc->retval); if (retval) { ice_debug(hw, ICE_DBG_AQ_MSG, - "Control Send Queue command completed with error 0x%x\n", + "Control Send Queue command 0x%04X completed with error 0x%X\n", + le16_to_cpu(desc->opcode), retval); /* strip off FW internal code */ @@ -926,7 +1033,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, ice_debug(hw, ICE_DBG_AQ_MSG, "ATQ: desc and buffer writeback:\n"); - ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size); + ice_debug_cq(hw, (void *)desc, buf, buf_size); /* save writeback AQ if requested */ if (details->wb_desc) @@ -1015,7 +1122,8 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; ice_debug(hw, ICE_DBG_AQ_MSG, - "Control Receive Queue Event received with error 0x%x\n", + "Control Receive Queue Event 0x%04X received with error 0x%X\n", + le16_to_cpu(desc->opcode), cq->rq_last_status); } memcpy(&e->desc, desc, sizeof(e->desc)); @@ -1024,10 +1132,9 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (e->msg_buf && e->msg_len) memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len); - ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n"); + ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n"); - ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf, - cq->rq_buf_size); + ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size); /* Restore the original datalen and buffer address in the desc, * FW updates datalen to indicate the event message size diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 44945c2165d8..bf0ebe6149e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -22,7 +22,7 @@ */ #define EXP_FW_API_VER_BRANCH 0x00 #define EXP_FW_API_VER_MAJOR 0x01 -#define EXP_FW_API_VER_MINOR 0x03 +#define EXP_FW_API_VER_MINOR 0x05 /* Different control queue types: These are mainly for SW consumption. */ enum ice_ctl_q { @@ -31,8 +31,9 @@ enum ice_ctl_q { ICE_CTL_Q_MAILBOX, }; -/* Control Queue default settings */ -#define ICE_CTL_Q_SQ_CMD_TIMEOUT 250 /* msecs */ +/* Control Queue timeout settings - max delay 250ms */ +#define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */ +#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ struct ice_ctl_q_ring { void *dma_head; /* Virtual address to DMA head */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index c2002ded65f6..adb8dab765c8 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -60,7 +60,7 @@ ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf, * Enable or Disable posting of an event on ARQ when LLDP MIB * associated with the interface changes (0x0A01) */ -enum ice_status +static enum ice_status ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update, struct ice_sq_cd *cd) { @@ -444,9 +444,15 @@ ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv, * |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7| * --------------------------------- */ - ice_for_each_traffic_class(i) + ice_for_each_traffic_class(i) { etscfg->tcbwtable[i] = buf[offset++]; + if (etscfg->prio_table[i] == ICE_CEE_PGID_STRICT) + dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT; + else + dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS; + } + /* Number of TCs supported (1 octet) */ etscfg->maxtcs = buf[offset]; } @@ -937,10 +943,11 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi) /** * ice_init_dcb * @hw: pointer to the HW struct + * @enable_mib_change: enable MIB change event * * Update DCB configuration from the Firmware */ -enum ice_status ice_init_dcb(struct ice_hw *hw) +enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change) { struct ice_port_info *pi = hw->port_info; enum ice_status ret = 0; @@ -954,20 +961,51 @@ enum ice_status ice_init_dcb(struct ice_hw *hw) pi->dcbx_status = ice_get_dcbx_status(hw); if (pi->dcbx_status == ICE_DCBX_STATUS_DONE || - pi->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS) { + pi->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS || + pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) { /* Get current DCBX configuration */ ret = ice_get_dcb_cfg(pi); - pi->is_sw_lldp = (hw->adminq.sq_last_status == ICE_AQ_RC_EPERM); if (ret) return ret; + pi->is_sw_lldp = false; } else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) { return ICE_ERR_NOT_READY; } /* Configure the LLDP MIB change event */ - ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL); + if (enable_mib_change) { + ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL); + if (ret) + pi->is_sw_lldp = true; + } + + return ret; +} + +/** + * ice_cfg_lldp_mib_change + * @hw: pointer to the HW struct + * @ena_mib: enable/disable MIB change event + * + * Configure (disable/enable) MIB + */ +enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib) +{ + struct ice_port_info *pi = hw->port_info; + enum ice_status ret; + + if (!hw->func_caps.common_cap.dcb) + return ICE_ERR_NOT_SUPPORTED; + + /* Get DCBX status */ + pi->dcbx_status = ice_get_dcbx_status(hw); + + if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) + return ICE_ERR_NOT_READY; + + ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL); if (!ret) - pi->is_sw_lldp = false; + pi->is_sw_lldp = !ena_mib; return ret; } @@ -1285,13 +1323,13 @@ enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi) } /** - * ice_aq_query_port_ets - query port ets configuration + * ice_aq_query_port_ets - query port ETS configuration * @pi: port information structure * @buf: pointer to buffer * @buf_size: buffer size in bytes * @cd: pointer to command details structure or NULL * - * query current port ets configuration + * query current port ETS configuration */ static enum ice_status ice_aq_query_port_ets(struct ice_port_info *pi, @@ -1378,13 +1416,13 @@ ice_update_port_tc_tree_cfg(struct ice_port_info *pi, } /** - * ice_query_port_ets - query port ets configuration + * ice_query_port_ets - query port ETS configuration * @pi: port information structure * @buf: pointer to buffer * @buf_size: buffer size in bytes * @cd: pointer to command details structure or NULL * - * query current port ets configuration and update the + * query current port ETS configuration and update the * SW DB with the TC changes */ enum ice_status diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h index 522e1452abe2..ee138f9bdc7c 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb.h @@ -125,7 +125,7 @@ ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype, struct ice_dcbx_cfg *dcbcfg); enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi); enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi); -enum ice_status ice_init_dcb(struct ice_hw *hw); +enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change); enum ice_status ice_query_port_ets(struct ice_port_info *pi, struct ice_aqc_port_ets_elem *buf, u16 buf_size, @@ -139,9 +139,7 @@ ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd); enum ice_status ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent, bool *dcbx_agent_status, struct ice_sq_cd *cd); -enum ice_status -ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update, - struct ice_sq_cd *cd); +enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib); #else /* CONFIG_DCB */ static inline enum ice_status ice_aq_stop_lldp(struct ice_hw __always_unused *hw, @@ -172,9 +170,8 @@ ice_aq_start_stop_dcbx(struct ice_hw __always_unused *hw, } static inline enum ice_status -ice_aq_cfg_lldp_mib_change(struct ice_hw __always_unused *hw, - bool __always_unused ena_update, - struct ice_sq_cd __always_unused *cd) +ice_cfg_lldp_mib_change(struct ice_hw __always_unused *hw, + bool __always_unused ena_mib) { return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index fe88b127ca42..7108fb41b604 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -2,6 +2,49 @@ /* Copyright (c) 2019, Intel Corporation. */ #include "ice_dcb_lib.h" +#include "ice_dcb_nl.h" + +/** + * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration + * @vsi: the VSI being configured + * @ena_tc: TC map to be enabled + */ +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) +{ + struct net_device *netdev = vsi->netdev; + struct ice_pf *pf = vsi->back; + struct ice_dcbx_cfg *dcbcfg; + u8 netdev_tc; + int i; + + if (!netdev) + return; + + if (!ena_tc) { + netdev_reset_tc(netdev); + return; + } + + if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc)) + return; + + dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + + ice_for_each_traffic_class(i) + if (vsi->tc_cfg.ena_tc & BIT(i)) + netdev_set_tc_queue(netdev, + vsi->tc_cfg.tc_info[i].netdev_tc, + vsi->tc_cfg.tc_info[i].qcount_tx, + vsi->tc_cfg.tc_info[i].qoffset); + + for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { + u8 ets_tc = dcbcfg->etscfg.prio_table[i]; + + /* Get the mapped netdev TC# for the UP */ + netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; + netdev_set_prio_tc_map(netdev, i, netdev_tc); + } +} /** * ice_dcb_get_ena_tc - return bitmap of enabled TCs @@ -58,6 +101,16 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg) } /** + * ice_dcb_get_tc - Get the TC associated with the queue + * @vsi: ptr to the VSI + * @queue_index: queue number associated with VSI + */ +u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index) +{ + return vsi->tx_rings[queue_index]->dcb_tc; +} + +/** * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC * @vsi: VSI owner of rings being updated */ @@ -96,83 +149,62 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) } /** - * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs - * @pf: pointer to the PF struct - * - * Assumed caller has already disabled all VSIs before - * calling this function. Reconfiguring DCB based on - * local_dcbx_cfg. - */ -static void ice_pf_dcb_recfg(struct ice_pf *pf) -{ - struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; - u8 tc_map = 0; - int v, ret; - - /* Update each VSI */ - ice_for_each_vsi(pf, v) { - if (!pf->vsi[v]) - continue; - - if (pf->vsi[v]->type == ICE_VSI_PF) - tc_map = ice_dcb_get_ena_tc(dcbcfg); - else - tc_map = ICE_DFLT_TRAFFIC_CLASS; - - ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map); - if (ret) { - dev_err(&pf->pdev->dev, - "Failed to config TC for VSI index: %d\n", - pf->vsi[v]->idx); - continue; - } - - ice_vsi_map_rings_to_vectors(pf->vsi[v]); - } -} - -/** * ice_pf_dcb_cfg - Apply new DCB configuration * @pf: pointer to the PF struct * @new_cfg: DCBX config to apply * @locked: is the RTNL held */ -static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) { - struct ice_dcbx_cfg *old_cfg, *curr_cfg; struct ice_aqc_port_ets_elem buf = { 0 }; - int ret = 0; + struct ice_dcbx_cfg *old_cfg, *curr_cfg; + struct device *dev = ice_pf_to_dev(pf); + int ret = ICE_DCB_NO_HW_CHG; + struct ice_vsi *pf_vsi; curr_cfg = &pf->hw.port_info->local_dcbx_cfg; + /* FW does not care if change happened */ + if (!pf->hw.port_info->is_sw_lldp) + ret = ICE_DCB_HW_CHG_RST; + /* Enable DCB tagging only when more than one TC */ if (ice_dcb_get_num_tc(new_cfg) > 1) { - dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n"); + dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n"); set_bit(ICE_FLAG_DCB_ENA, pf->flags); } else { - dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n"); + dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n"); clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) { - dev_dbg(&pf->pdev->dev, "No change in DCB config required\n"); + dev_dbg(dev, "No change in DCB config required\n"); return ret; } /* Store old config in case FW config fails */ - old_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*old_cfg), GFP_KERNEL); - memcpy(old_cfg, curr_cfg, sizeof(*old_cfg)); + old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL); + if (!old_cfg) + return -ENOMEM; + + dev_info(dev, "Commit DCB Configuration to the hardware\n"); + pf_vsi = ice_get_main_vsi(pf); + if (!pf_vsi) { + dev_dbg(dev, "PF VSI doesn't exist\n"); + ret = -EINVAL; + goto free_cfg; + } /* avoid race conditions by holding the lock while disabling and * re-enabling the VSI */ if (!locked) rtnl_lock(); - ice_pf_dis_all_vsi(pf, true); + ice_dis_vsi(pf_vsi, true); memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg)); memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec)); + memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec)); /* Only send new config to HW if we are in SW LLDP mode. Otherwise, * the new config came from the HW in the first place. @@ -180,7 +212,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) if (pf->hw.port_info->is_sw_lldp) { ret = ice_set_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(&pf->pdev->dev, "Set DCB Config failed\n"); + dev_err(dev, "Set DCB Config failed\n"); /* Restore previous settings to local config */ memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg)); goto out; @@ -189,34 +221,108 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + dev_err(dev, "Query Port ETS failed\n"); goto out; } ice_pf_dcb_recfg(pf); out: - ice_pf_ena_all_vsi(pf, true); + ice_ena_vsi(pf_vsi, true); if (!locked) rtnl_unlock(); - devm_kfree(&pf->pdev->dev, old_cfg); +free_cfg: + kfree(old_cfg); return ret; } /** + * ice_cfg_etsrec_defaults - Set default ETS recommended DCB config + * @pi: port information structure + */ +static void ice_cfg_etsrec_defaults(struct ice_port_info *pi) +{ + struct ice_dcbx_cfg *dcbcfg = &pi->local_dcbx_cfg; + u8 i; + + /* Ensure ETS recommended DCB configuration is not already set */ + if (dcbcfg->etsrec.maxtcs) + return; + + /* In CEE mode, set the default to 1 TC */ + dcbcfg->etsrec.maxtcs = 1; + for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + dcbcfg->etsrec.tcbwtable[i] = i ? 0 : 100; + dcbcfg->etsrec.tsatable[i] = i ? ICE_IEEE_TSA_STRICT : + ICE_IEEE_TSA_ETS; + } +} + +/** + * ice_dcb_need_recfg - Check if DCB needs reconfig + * @pf: board private structure + * @old_cfg: current DCB config + * @new_cfg: new DCB config + */ +static bool +ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, + struct ice_dcbx_cfg *new_cfg) +{ + struct device *dev = ice_pf_to_dev(pf); + bool need_reconfig = false; + + /* Check if ETS configuration has changed */ + if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg, + sizeof(new_cfg->etscfg))) { + /* If Priority Table has changed reconfig is needed */ + if (memcmp(&new_cfg->etscfg.prio_table, + &old_cfg->etscfg.prio_table, + sizeof(new_cfg->etscfg.prio_table))) { + need_reconfig = true; + dev_dbg(dev, "ETS UP2TC changed.\n"); + } + + if (memcmp(&new_cfg->etscfg.tcbwtable, + &old_cfg->etscfg.tcbwtable, + sizeof(new_cfg->etscfg.tcbwtable))) + dev_dbg(dev, "ETS TC BW Table changed.\n"); + + if (memcmp(&new_cfg->etscfg.tsatable, + &old_cfg->etscfg.tsatable, + sizeof(new_cfg->etscfg.tsatable))) + dev_dbg(dev, "ETS TSA Table changed.\n"); + } + + /* Check if PFC configuration has changed */ + if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) { + need_reconfig = true; + dev_dbg(dev, "PFC config change detected.\n"); + } + + /* Check if APP Table has changed */ + if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) { + need_reconfig = true; + dev_dbg(dev, "APP Table change detected.\n"); + } + + dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig); + return need_reconfig; +} + +/** * ice_dcb_rebuild - rebuild DCB post reset * @pf: physical function instance */ void ice_dcb_rebuild(struct ice_pf *pf) { struct ice_aqc_port_ets_elem buf = { 0 }; - struct ice_dcbx_cfg *prev_cfg; + struct device *dev = ice_pf_to_dev(pf); + struct ice_dcbx_cfg *err_cfg; enum ice_status ret; - u8 willing; ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + dev_err(dev, "Query Port ETS failed\n"); goto dcb_error; } @@ -224,61 +330,58 @@ void ice_dcb_rebuild(struct ice_pf *pf) if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags)) return; - /* Save current willing state and force FW to unwilling */ - willing = pf->hw.port_info->local_dcbx_cfg.etscfg.willing; - pf->hw.port_info->local_dcbx_cfg.etscfg.willing = 0x0; + mutex_lock(&pf->tc_mutex); + + if (!pf->hw.port_info->is_sw_lldp) + ice_cfg_etsrec_defaults(pf->hw.port_info); + ret = ice_set_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(&pf->pdev->dev, "Failed to set DCB to unwilling\n"); - goto dcb_error; - } - - /* Retrieve DCB config and ensure same as current in SW */ - prev_cfg = devm_kmemdup(&pf->pdev->dev, - &pf->hw.port_info->local_dcbx_cfg, - sizeof(*prev_cfg), GFP_KERNEL); - if (!prev_cfg) { - dev_err(&pf->pdev->dev, "Failed to alloc space for DCB cfg\n"); + dev_err(dev, "Failed to set DCB config in rebuild\n"); goto dcb_error; } - ice_init_dcb(&pf->hw); - if (memcmp(prev_cfg, &pf->hw.port_info->local_dcbx_cfg, - sizeof(*prev_cfg))) { - /* difference in cfg detected - disable DCB till next MIB */ - dev_err(&pf->pdev->dev, "Set local MIB not accurate\n"); - devm_kfree(&pf->pdev->dev, prev_cfg); - goto dcb_error; + if (!pf->hw.port_info->is_sw_lldp) { + ret = ice_cfg_lldp_mib_change(&pf->hw, true); + if (ret && !pf->hw.port_info->is_sw_lldp) { + dev_err(dev, "Failed to register for MIB changes\n"); + goto dcb_error; + } } - /* fetched config congruent to previous configuration */ - devm_kfree(&pf->pdev->dev, prev_cfg); - - /* Configuration replayed - reset willing state to previous */ - pf->hw.port_info->local_dcbx_cfg.etscfg.willing = willing; - ret = ice_set_dcb_cfg(pf->hw.port_info); - if (ret) { - dev_err(&pf->pdev->dev, "Fail restoring prev willing state\n"); - goto dcb_error; - } - dev_info(&pf->pdev->dev, "DCB restored after reset\n"); + dev_info(dev, "DCB restored after reset\n"); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + dev_err(dev, "Query Port ETS failed\n"); goto dcb_error; } + mutex_unlock(&pf->tc_mutex); + return; dcb_error: - dev_err(&pf->pdev->dev, "Disabling DCB until new settings occur\n"); - prev_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*prev_cfg), GFP_KERNEL); - prev_cfg->etscfg.willing = true; - prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW; - prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; - memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec)); - ice_pf_dcb_cfg(pf, prev_cfg, false); - devm_kfree(&pf->pdev->dev, prev_cfg); + dev_err(dev, "Disabling DCB until new settings occur\n"); + err_cfg = kzalloc(sizeof(*err_cfg), GFP_KERNEL); + if (!err_cfg) { + mutex_unlock(&pf->tc_mutex); + return; + } + + err_cfg->etscfg.willing = true; + err_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW; + err_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; + memcpy(&err_cfg->etsrec, &err_cfg->etscfg, sizeof(err_cfg->etsrec)); + /* Coverity warns the return code of ice_pf_dcb_cfg() is not checked + * here as is done for other calls to that function. That check is + * not necessary since this is in this function's error cleanup path. + * Suppress the Coverity warning with the following comment... + */ + /* coverity[check_return] */ + ice_pf_dcb_cfg(pf, err_cfg, false); + kfree(err_cfg); + + mutex_unlock(&pf->tc_mutex); } /** @@ -293,28 +396,28 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked) int ret = 0; pi = pf->hw.port_info; - newcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*newcfg), GFP_KERNEL); + newcfg = kmemdup(&pi->local_dcbx_cfg, sizeof(*newcfg), GFP_KERNEL); if (!newcfg) return -ENOMEM; - memcpy(newcfg, &pi->local_dcbx_cfg, sizeof(*newcfg)); memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg)); - dev_info(&pf->pdev->dev, "Configuring initial DCB values\n"); + dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n"); if (ice_pf_dcb_cfg(pf, newcfg, locked)) ret = -EINVAL; - devm_kfree(&pf->pdev->dev, newcfg); + kfree(newcfg); return ret; } /** - * ice_dcb_sw_default_config - Apply a default DCB config + * ice_dcb_sw_dflt_cfg - Apply a default DCB config * @pf: PF to apply config to + * @ets_willing: configure ETS willing * @locked: was this function called with RTNL held */ -static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) +static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) { struct ice_aqc_port_ets_elem buf = { 0 }; struct ice_dcbx_cfg *dcbcfg; @@ -324,13 +427,14 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) hw = &pf->hw; pi = hw->port_info; - dcbcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*dcbcfg), GFP_KERNEL); + dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL); + if (!dcbcfg) + return -ENOMEM; - memset(dcbcfg, 0, sizeof(*dcbcfg)); memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg)); - dcbcfg->etscfg.willing = 1; - dcbcfg->etscfg.maxtcs = 8; + dcbcfg->etscfg.willing = ets_willing ? 1 : 0; + dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc; dcbcfg->etscfg.tcbwtable[0] = 100; dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; @@ -339,7 +443,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) dcbcfg->etsrec.willing = 0; dcbcfg->pfc.willing = 1; - dcbcfg->pfc.pfccap = IEEE_8021QAZ_MAX_TCS; + dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc; dcbcfg->numapps = 1; dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE; @@ -347,7 +451,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE; ret = ice_pf_dcb_cfg(pf, dcbcfg, locked); - devm_kfree(&pf->pdev->dev, dcbcfg); + kfree(dcbcfg); if (ret) return ret; @@ -355,68 +459,165 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) } /** + * ice_dcb_tc_contig - Check that TCs are contiguous + * @prio_table: pointer to priority table + * + * Check if TCs begin with TC0 and are contiguous + */ +static bool ice_dcb_tc_contig(u8 *prio_table) +{ + u8 max_tc = 0; + int i; + + for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) { + u8 cur_tc = prio_table[i]; + + if (cur_tc > max_tc) + return false; + else if (cur_tc == max_tc) + max_tc++; + } + + return true; +} + +/** + * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs + * @pf: pointer to the PF struct + * + * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing + */ +static int ice_dcb_noncontig_cfg(struct ice_pf *pf) +{ + struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + struct device *dev = ice_pf_to_dev(pf); + int ret; + + /* Configure SW DCB default with ETS non-willing */ + ret = ice_dcb_sw_dflt_cfg(pf, false, true); + if (ret) { + dev_err(dev, "Failed to set local DCB config %d\n", ret); + return ret; + } + + /* Reconfigure with ETS willing so that FW will send LLDP MIB event */ + dcbcfg->etscfg.willing = 1; + ret = ice_set_dcb_cfg(pf->hw.port_info); + if (ret) + dev_err(dev, "Failed to set DCB to unwilling\n"); + + return ret; +} + +/** + * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs + * @pf: pointer to the PF struct + * + * Assumed caller has already disabled all VSIs before + * calling this function. Reconfiguring DCB based on + * local_dcbx_cfg. + */ +void ice_pf_dcb_recfg(struct ice_pf *pf) +{ + struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + u8 tc_map = 0; + int v, ret; + + /* Update each VSI */ + ice_for_each_vsi(pf, v) { + struct ice_vsi *vsi = pf->vsi[v]; + + if (!vsi) + continue; + + if (vsi->type == ICE_VSI_PF) { + tc_map = ice_dcb_get_ena_tc(dcbcfg); + + /* If DCBX request non-contiguous TC, then configure + * default TC + */ + if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) { + tc_map = ICE_DFLT_TRAFFIC_CLASS; + ice_dcb_noncontig_cfg(pf); + } + } else { + tc_map = ICE_DFLT_TRAFFIC_CLASS; + } + + ret = ice_vsi_cfg_tc(vsi, tc_map); + if (ret) { + dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n", + vsi->idx); + continue; + } + + ice_vsi_map_rings_to_vectors(vsi); + if (vsi->type == ICE_VSI_PF) + ice_dcbnl_set_all(vsi); + } +} + +/** * ice_init_pf_dcb - initialize DCB for a PF * @pf: PF to initialize DCB for * @locked: Was function called with RTNL held */ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) { - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); struct ice_port_info *port_info; struct ice_hw *hw = &pf->hw; - int sw_default = 0; int err; port_info = hw->port_info; - err = ice_init_dcb(hw); - if (err) { - /* FW LLDP is not active, default to SW DCBX/LLDP */ - dev_info(&pf->pdev->dev, "FW LLDP is not active\n"); - hw->port_info->dcbx_status = ICE_DCBX_STATUS_NOT_STARTED; - hw->port_info->is_sw_lldp = true; - } - - if (port_info->dcbx_status == ICE_DCBX_STATUS_DIS) - dev_info(&pf->pdev->dev, "DCBX disabled\n"); - - /* LLDP disabled in FW */ - if (port_info->is_sw_lldp) { - sw_default = 1; - dev_info(&pf->pdev->dev, "DCBx/LLDP in SW mode.\n"); - clear_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags); - } else { - set_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags); + err = ice_init_dcb(hw, false); + if (err && !port_info->is_sw_lldp) { + dev_err(dev, "Error initializing DCB %d\n", err); + goto dcb_init_err; } - if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) - dev_info(&pf->pdev->dev, "DCBX not started\n"); + dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n", + pf->hw.func_caps.common_cap.maxtc); + if (err) { + struct ice_vsi *pf_vsi; - if (sw_default) { - err = ice_dcb_sw_dflt_cfg(pf, locked); + /* FW LLDP is disabled, activate SW DCBX/LLDP mode */ + dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n"); + clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); + err = ice_dcb_sw_dflt_cfg(pf, true, locked); if (err) { - dev_err(&pf->pdev->dev, - "Failed to set local DCB config %d\n", err); + dev_err(dev, "Failed to set local DCB config %d\n", + err); err = -EIO; goto dcb_init_err; } + /* If the FW DCBX engine is not running then Rx LLDP packets + * need to be redirected up the stack. + */ + pf_vsi = ice_get_main_vsi(pf); + if (!pf_vsi) { + dev_err(dev, "Failed to set local DCB config\n"); + err = -EIO; + goto dcb_init_err; + } + + ice_cfg_sw_lldp(pf_vsi, false, true); + pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; - set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); - set_bit(ICE_FLAG_DCB_ENA, pf->flags); return 0; } + set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); + /* DCBX in FW and LLDP enabled in FW */ pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_IEEE; - set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); - err = ice_dcb_init_cfg(pf, locked); if (err) goto dcb_init_err; - dev_info(&pf->pdev->dev, "DCBX offload supported\n"); return err; dcb_init_err: @@ -432,30 +633,31 @@ void ice_update_dcb_stats(struct ice_pf *pf) { struct ice_hw_port_stats *prev_ps, *cur_ps; struct ice_hw *hw = &pf->hw; - u8 pf_id = hw->pf_id; + u8 port; int i; + port = hw->port_info->lport; prev_ps = &pf->stats_prev; cur_ps = &pf->stats; for (i = 0; i < 8; i++) { - ice_stat_update32(hw, GLPRT_PXOFFRXC(pf_id, i), + ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i), pf->stat_prev_loaded, &prev_ps->priority_xoff_rx[i], &cur_ps->priority_xoff_rx[i]); - ice_stat_update32(hw, GLPRT_PXONRXC(pf_id, i), + ice_stat_update32(hw, GLPRT_PXONRXC(port, i), pf->stat_prev_loaded, &prev_ps->priority_xon_rx[i], &cur_ps->priority_xon_rx[i]); - ice_stat_update32(hw, GLPRT_PXONTXC(pf_id, i), + ice_stat_update32(hw, GLPRT_PXONTXC(port, i), pf->stat_prev_loaded, &prev_ps->priority_xon_tx[i], &cur_ps->priority_xon_tx[i]); - ice_stat_update32(hw, GLPRT_PXOFFTXC(pf_id, i), + ice_stat_update32(hw, GLPRT_PXOFFTXC(port, i), pf->stat_prev_loaded, &prev_ps->priority_xoff_tx[i], &cur_ps->priority_xoff_tx[i]); - ice_stat_update32(hw, GLPRT_RXON2OFFCNT(pf_id, i), + ice_stat_update32(hw, GLPRT_RXON2OFFCNT(port, i), pf->stat_prev_loaded, &prev_ps->priority_xon_2_xoff[i], &cur_ps->priority_xon_2_xoff[i]); @@ -502,55 +704,6 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, } /** - * ice_dcb_need_recfg - Check if DCB needs reconfig - * @pf: board private structure - * @old_cfg: current DCB config - * @new_cfg: new DCB config - */ -static bool ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, - struct ice_dcbx_cfg *new_cfg) -{ - bool need_reconfig = false; - - /* Check if ETS configuration has changed */ - if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg, - sizeof(new_cfg->etscfg))) { - /* If Priority Table has changed reconfig is needed */ - if (memcmp(&new_cfg->etscfg.prio_table, - &old_cfg->etscfg.prio_table, - sizeof(new_cfg->etscfg.prio_table))) { - need_reconfig = true; - dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n"); - } - - if (memcmp(&new_cfg->etscfg.tcbwtable, - &old_cfg->etscfg.tcbwtable, - sizeof(new_cfg->etscfg.tcbwtable))) - dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n"); - - if (memcmp(&new_cfg->etscfg.tsatable, - &old_cfg->etscfg.tsatable, - sizeof(new_cfg->etscfg.tsatable))) - dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n"); - } - - /* Check if PFC configuration has changed */ - if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) { - need_reconfig = true; - dev_dbg(&pf->pdev->dev, "PFC config change detected.\n"); - } - - /* Check if APP Table has changed */ - if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) { - need_reconfig = true; - dev_dbg(&pf->pdev->dev, "APP Table change detected.\n"); - } - - dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig); - return need_reconfig; -} - -/** * ice_dcb_process_lldp_set_mib_change - Process MIB change * @pf: ptr to ice_pf * @event: pointer to the admin queue receive event @@ -560,10 +713,12 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { struct ice_aqc_port_ets_elem buf = { 0 }; + struct device *dev = ice_pf_to_dev(pf); struct ice_aqc_lldp_get_mib *mib; struct ice_dcbx_cfg tmp_dcbx_cfg; bool need_reconfig = false; struct ice_port_info *pi; + struct ice_vsi *pf_vsi; u8 type; int ret; @@ -572,8 +727,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, return; if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) { - dev_dbg(&pf->pdev->dev, - "MIB Change Event in HOST mode\n"); + dev_dbg(dev, "MIB Change Event in HOST mode\n"); return; } @@ -582,25 +736,26 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* Ignore if event is not for Nearest Bridge */ type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) & ICE_AQ_LLDP_BRID_TYPE_M); - dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type); + dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", type); if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID) return; /* Check MIB Type and return if event for Remote MIB update */ type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M; - dev_dbg(&pf->pdev->dev, - "LLDP event mib type %s\n", type ? "remote" : "local"); + dev_dbg(dev, "LLDP event mib type %s\n", type ? "remote" : "local"); if (type == ICE_AQ_LLDP_MIB_REMOTE) { /* Update the remote cached instance and return */ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, &pi->remote_dcbx_cfg); if (ret) { - dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n"); + dev_err(dev, "Failed to get remote DCB config\n"); return; } } + mutex_lock(&pf->tc_mutex); + /* store the old configuration */ tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg; @@ -610,44 +765,52 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* Get updated DCBX data from firmware */ ret = ice_get_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(&pf->pdev->dev, "Failed to get DCB config\n"); - return; + dev_err(dev, "Failed to get DCB config\n"); + goto out; } /* No change detected in DCBX configs */ if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) { - dev_dbg(&pf->pdev->dev, - "No change detected in DCBX configuration.\n"); - return; + dev_dbg(dev, "No change detected in DCBX configuration.\n"); + goto out; } need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg); + ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg); if (!need_reconfig) - return; + goto out; /* Enable DCB tagging only when more than one TC */ if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) { - dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n"); + dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n"); set_bit(ICE_FLAG_DCB_ENA, pf->flags); } else { - dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n"); + dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n"); clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } + pf_vsi = ice_get_main_vsi(pf); + if (!pf_vsi) { + dev_dbg(dev, "PF VSI doesn't exist\n"); + goto out; + } + rtnl_lock(); - ice_pf_dis_all_vsi(pf, true); + ice_dis_vsi(pf_vsi, true); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); - rtnl_unlock(); - return; + dev_err(dev, "Query Port ETS failed\n"); + goto unlock_rtnl; } /* changes in configuration update VSI */ ice_pf_dcb_recfg(pf); - ice_pf_ena_all_vsi(pf, true); + ice_ena_vsi(pf_vsi, true); +unlock_rtnl: rtnl_unlock(); +out: + mutex_unlock(&pf->tc_mutex); } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 819081053ff5..f15e5776f287 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -5,14 +5,22 @@ #define _ICE_DCB_LIB_H_ #include "ice.h" +#include "ice_base.h" #include "ice_lib.h" #ifdef CONFIG_DCB -#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */ +#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */ +#define ICE_DCB_HW_CHG_RST 0 /* DCB configuration changed with reset */ +#define ICE_DCB_NO_HW_CHG 1 /* DCB configuration did not change */ +#define ICE_DCB_HW_CHG 2 /* DCB configuration changed, no reset */ void ice_dcb_rebuild(struct ice_pf *pf); u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg); u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); +u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index); +int +ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked); +void ice_pf_dcb_recfg(struct ice_pf *pf); void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi); int ice_init_pf_dcb(struct ice_pf *pf, bool locked); void ice_update_dcb_stats(struct ice_pf *pf); @@ -22,6 +30,7 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event); +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) { @@ -40,10 +49,25 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg) return 1; } +static inline u8 +ice_dcb_get_tc(struct ice_vsi __always_unused *vsi, + int __always_unused queue_index) +{ + return 0; +} + static inline int ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked) { - dev_dbg(&pf->pdev->dev, "DCB not supported\n"); + dev_dbg(ice_pf_to_dev(pf), "DCB not supported\n"); + return -EOPNOTSUPP; +} + +static inline int +ice_pf_dcb_cfg(struct ice_pf __always_unused *pf, + struct ice_dcbx_cfg __always_unused *new_cfg, + bool __always_unused locked) +{ return -EOPNOTSUPP; } @@ -55,8 +79,10 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring, } #define ice_update_dcb_stats(pf) do {} while (0) +#define ice_pf_dcb_recfg(pf) do {} while (0) #define ice_vsi_cfg_dcb_rings(vsi) do {} while (0) #define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0) #define ice_set_cgd_num(tlan_ctx, ring) do {} while (0) +#define ice_vsi_cfg_netdev_tc(vsi, ena_tc) do {} while (0) #endif /* CONFIG_DCB */ #endif /* _ICE_DCB_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c new file mode 100644 index 000000000000..b61aba428adb --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -0,0 +1,931 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice.h" +#include "ice_dcb.h" +#include "ice_dcb_lib.h" +#include "ice_dcb_nl.h" +#include <net/dcbnl.h> + +#define ICE_APP_PROT_ID_ROCE 0x8915 + +/** + * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info + * @netdev: device associated with interface that needs reset + */ +static void ice_dcbnl_devreset(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + while (ice_is_reset_in_progress(pf->state)) + usleep_range(1000, 2000); + + set_bit(__ICE_DCBNL_DEVRESET, pf->state); + dev_close(netdev); + netdev_state_change(netdev); + dev_open(netdev, NULL); + netdev_state_change(netdev); + clear_bit(__ICE_DCBNL_DEVRESET, pf->state); +} + +/** + * ice_dcbnl_getets - retrieve local ETS configuration + * @netdev: the relevant netdev + * @ets: struct to hold ETS configuration + */ +static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets) +{ + struct ice_dcbx_cfg *dcbxcfg; + struct ice_port_info *pi; + struct ice_pf *pf; + + pf = ice_netdev_to_pf(netdev); + pi = pf->hw.port_info; + dcbxcfg = &pi->local_dcbx_cfg; + + ets->willing = dcbxcfg->etscfg.willing; + ets->ets_cap = dcbxcfg->etscfg.maxtcs; + ets->cbs = dcbxcfg->etscfg.cbs; + memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw)); + memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw)); + memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa)); + memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc)); + memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable, + sizeof(ets->tc_reco_bw)); + memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable, + sizeof(ets->tc_reco_tsa)); + memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table, + sizeof(ets->reco_prio_tc)); + + return 0; +} + +/** + * ice_dcbnl_setets - set IEEE ETS configuration + * @netdev: pointer to relevant netdev + * @ets: struct to hold ETS configuration + */ +static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int bwcfg = 0, bwrec = 0; + int err, i, max_tc = 0; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + mutex_lock(&pf->tc_mutex); + + new_cfg->etscfg.willing = ets->willing; + new_cfg->etscfg.cbs = ets->cbs; + ice_for_each_traffic_class(i) { + new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i]; + bwcfg += ets->tc_tx_bw[i]; + new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i]; + new_cfg->etscfg.prio_table[i] = ets->prio_tc[i]; + if (ets->prio_tc[i] > max_tc) + max_tc = ets->prio_tc[i]; + new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i]; + bwrec += ets->tc_reco_bw[i]; + new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i]; + new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i]; + } + + /* max_tc is a 1-8 value count of number of TC's, not a 0-7 value + * for the TC's index number. Add one to value if not zero, and + * for zero set it to the FW's default value + */ + if (max_tc) + max_tc++; + else + max_tc = IEEE_8021QAZ_MAX_TCS; + + new_cfg->etscfg.maxtcs = max_tc; + + if (!bwcfg) + new_cfg->etscfg.tcbwtable[0] = 100; + + if (!bwrec) + new_cfg->etsrec.tcbwtable[0] = 100; + + err = ice_pf_dcb_cfg(pf, new_cfg, true); + /* return of zero indicates new cfg applied */ + if (err == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (err == ICE_DCB_NO_HW_CHG) + err = ICE_DCB_HW_CHG_RST; + + mutex_unlock(&pf->tc_mutex); + return err; +} + +/** + * ice_dcbnl_getnumtcs - Get max number of traffic classes supported + * @dev: pointer to netdev struct + * @tcid: TC ID + * @num: total number of TCs supported by the adapter + * + * Return the total number of TCs supported + */ +static int +ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num) +{ + struct ice_pf *pf = ice_netdev_to_pf(dev); + + if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) + return -EINVAL; + + *num = IEEE_8021QAZ_MAX_TCS; + return 0; +} + +/** + * ice_dcbnl_getdcbx - retrieve current DCBX capability + * @netdev: pointer to the netdev struct + */ +static u8 ice_dcbnl_getdcbx(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + return pf->dcbx_cap; +} + +/** + * ice_dcbnl_setdcbx - set required DCBX capability + * @netdev: the corresponding netdev + * @mode: required mode + */ +static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + /* No support for LLD_MANAGED modes or CEE+IEEE */ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) || + !(mode & DCB_CAP_DCBX_HOST)) + return ICE_DCB_NO_HW_CHG; + + /* Already set to the given mode no change */ + if (mode == pf->dcbx_cap) + return ICE_DCB_NO_HW_CHG; + + pf->dcbx_cap = mode; + if (mode & DCB_CAP_DCBX_VER_CEE) + pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE; + else + pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE; + + dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode); + return ICE_DCB_HW_CHG_RST; +} + +/** + * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX + * @netdev: pointer to netdev struct + * @perm_addr: buffer to return permanent MAC address + */ +static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + int i, j; + + memset(perm_addr, 0xff, MAX_ADDR_LEN); + + for (i = 0; i < netdev->addr_len; i++) + perm_addr[i] = pi->mac.perm_addr[i]; + + for (j = 0; j < netdev->addr_len; j++, i++) + perm_addr[i] = pi->mac.perm_addr[j]; +} + +/** + * ice_get_pfc_delay - Retrieve PFC Link Delay + * @hw: pointer to HW struct + * @delay: holds the PFC Link Delay value + */ +static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay) +{ + u32 val; + + val = rd32(hw, PRTDCB_GENC); + *delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S); +} + +/** + * ice_dcbnl_getpfc - retrieve local IEEE PFC config + * @netdev: pointer to netdev struct + * @pfc: struct to hold PFC info + */ +static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + struct ice_dcbx_cfg *dcbxcfg; + int i; + + dcbxcfg = &pi->local_dcbx_cfg; + pfc->pfc_cap = dcbxcfg->pfc.pfccap; + pfc->pfc_en = dcbxcfg->pfc.pfcena; + pfc->mbc = dcbxcfg->pfc.mbc; + ice_get_pfc_delay(&pf->hw, &pfc->delay); + + ice_for_each_traffic_class(i) { + pfc->requests[i] = pf->stats.priority_xoff_tx[i]; + pfc->indications[i] = pf->stats.priority_xoff_rx[i]; + } + + return 0; +} + +/** + * ice_dcbnl_setpfc - set local IEEE PFC config + * @netdev: pointer to relevant netdev + * @pfc: pointer to struct holding PFC config + */ +static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int err; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + mutex_lock(&pf->tc_mutex); + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + if (pfc->pfc_cap) + new_cfg->pfc.pfccap = pfc->pfc_cap; + else + new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc; + + new_cfg->pfc.pfcena = pfc->pfc_en; + + err = ice_pf_dcb_cfg(pf, new_cfg, true); + if (err == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (err == ICE_DCB_NO_HW_CHG) + err = ICE_DCB_HW_CHG_RST; + mutex_unlock(&pf->tc_mutex); + return err; +} + +/** + * ice_dcbnl_get_pfc_cfg - Get CEE PFC config + * @netdev: pointer to netdev struct + * @prio: corresponding user priority + * @setting: the PFC setting for given priority + */ +static void +ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + *setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1; + dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n", + prio, *setting, pi->local_dcbx_cfg.pfc.pfcena); +} + +/** + * ice_dcbnl_set_pfc_cfg - Set CEE PFC config + * @netdev: the corresponding netdev + * @prio: User Priority + * @set: PFC setting to apply + */ +static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc; + if (set) + new_cfg->pfc.pfcena |= BIT(prio); + else + new_cfg->pfc.pfcena &= ~BIT(prio); + + dev_dbg(ice_pf_to_dev(pf), "Set PFC config UP:%d set:%d pfcena:0x%x\n", + prio, set, new_cfg->pfc.pfcena); +} + +/** + * ice_dcbnl_getpfcstate - get CEE PFC mode + * @netdev: pointer to netdev struct + */ +static u8 ice_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + /* Return enabled if any UP enabled for PFC */ + if (pi->local_dcbx_cfg.pfc.pfcena) + return 1; + + return 0; +} + +/** + * ice_dcbnl_getstate - get DCB enabled state + * @netdev: pointer to netdev struct + */ +static u8 ice_dcbnl_getstate(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + u8 state = 0; + + state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); + + dev_dbg(ice_pf_to_dev(pf), "DCB enabled state = %d\n", state); + return state; +} + +/** + * ice_dcbnl_setstate - Set CEE DCB state + * @netdev: pointer to relevant netdev + * @state: state value to set + */ +static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return ICE_DCB_NO_HW_CHG; + + /* Nothing to do */ + if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags)) + return ICE_DCB_NO_HW_CHG; + + if (state) { + set_bit(ICE_FLAG_DCB_ENA, pf->flags); + memcpy(&pf->hw.port_info->desired_dcbx_cfg, + &pf->hw.port_info->local_dcbx_cfg, + sizeof(struct ice_dcbx_cfg)); + } else { + clear_bit(ICE_FLAG_DCB_ENA, pf->flags); + } + + return ICE_DCB_HW_CHG; +} + +/** + * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config + * @netdev: pointer to netdev struct + * @prio: the corresponding user priority + * @prio_type: traffic priority type + * @pgid: the BW group ID the traffic class belongs to + * @bw_pct: BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding TC + */ +static void +ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio, + u8 __always_unused *prio_type, u8 *pgid, + u8 __always_unused *bw_pct, + u8 __always_unused *up_map) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio]; + dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio, + *pgid); +} + +/** + * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config + * @netdev: pointer to relevant netdev + * @tc: the corresponding traffic class + * @prio_type: the traffic priority type + * @bwg_id: the BW group ID the TC belongs to + * @bw_pct: the BW perventage for the BWG + * @up_map: prio mapped to corresponding TC + */ +static void +ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, + u8 __always_unused prio_type, + u8 __always_unused bwg_id, + u8 __always_unused bw_pct, u8 up_map) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int i; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (tc >= ICE_MAX_TRAFFIC_CLASS) + return; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + /* prio_type, bwg_id and bw_pct per UP are not supported */ + + ice_for_each_traffic_class(i) { + if (up_map & BIT(i)) + new_cfg->etscfg.prio_table[i] = tc; + } + new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS; +} + +/** + * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config + * @netdev: pointer to the netdev struct + * @pgid: corresponding traffic class + * @bw_pct: the BW percentage for the corresponding TC + */ +static void +ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (pgid >= ICE_MAX_TRAFFIC_CLASS) + return; + + *bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid]; + dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n", + pgid, *bw_pct); +} + +/** + * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config + * @netdev: the corresponding netdev + * @pgid: Correspongind traffic class + * @bw_pct: the BW percentage for the specified TC + */ +static void +ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (pgid >= ICE_MAX_TRAFFIC_CLASS) + return; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + new_cfg->etscfg.tcbwtable[pgid] = bw_pct; +} + +/** + * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config + * @netdev: pointer to netdev struct + * @prio: the corresponding user priority + * @prio_type: the traffic priority type + * @pgid: the PG ID + * @bw_pct: the BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding TC + */ +static void +ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio, + u8 __always_unused *prio_type, u8 *pgid, + u8 __always_unused *bw_pct, + u8 __always_unused *up_map) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio]; +} + +/** + * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config + * @netdev: pointer to netdev struct + * @pgid: the corresponding traffic class + * @bw_pct: the BW percentage for the corresponding TC + */ +static void +ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid, + u8 *bw_pct) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + *bw_pct = 0; +} + +/** + * ice_dcbnl_get_cap - Get DCBX capabilities of adapter + * @netdev: pointer to netdev struct + * @capid: the capability type + * @cap: the capability value + */ +static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))) + return ICE_DCB_NO_HW_CHG; + + switch (capid) { + case DCB_CAP_ATTR_PG: + *cap = true; + break; + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_UP2TC: + *cap = false; + break; + case DCB_CAP_ATTR_PG_TCS: + *cap = 0x80; + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 0x80; + break; + case DCB_CAP_ATTR_GSP: + *cap = false; + break; + case DCB_CAP_ATTR_BCN: + *cap = false; + break; + case DCB_CAP_ATTR_DCBX: + *cap = pf->dcbx_cap; + break; + default: + *cap = false; + break; + } + + dev_dbg(ice_pf_to_dev(pf), "DCBX Get Capability cap=%d capval=0x%x\n", + capid, *cap); + return 0; +} + +/** + * ice_dcbnl_getapp - get CEE APP + * @netdev: pointer to netdev struct + * @idtype: the App selector + * @id: the App ethtype or port number + */ +static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + return dcb_getapp(netdev, &app); +} + +/** + * ice_dcbnl_find_app - Search for APP in given DCB config + * @cfg: struct to hold DCBX config + * @app: struct to hold app data to look for + */ +static bool +ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg, + struct ice_dcb_app_priority_table *app) +{ + int i; + + for (i = 0; i < cfg->numapps; i++) { + if (app->selector == cfg->app[i].selector && + app->prot_id == cfg->app[i].prot_id && + app->priority == cfg->app[i].priority) + return true; + } + + return false; +} + +/** + * ice_dcbnl_setapp - set local IEEE App config + * @netdev: relevant netdev struct + * @app: struct to hold app config info + */ +static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcb_app_priority_table new_app; + struct ice_dcbx_cfg *old_cfg, *new_cfg; + int ret; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + mutex_lock(&pf->tc_mutex); + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + old_cfg = &pf->hw.port_info->local_dcbx_cfg; + + if (old_cfg->numapps == ICE_DCBX_MAX_APPS) { + ret = -EINVAL; + goto setapp_out; + } + + ret = dcb_ieee_setapp(netdev, app); + if (ret) + goto setapp_out; + + new_app.selector = app->selector; + new_app.prot_id = app->protocol; + new_app.priority = app->priority; + if (ice_dcbnl_find_app(old_cfg, &new_app)) { + ret = 0; + goto setapp_out; + } + + new_cfg->app[new_cfg->numapps++] = new_app; + ret = ice_pf_dcb_cfg(pf, new_cfg, true); + /* return of zero indicates new cfg applied */ + if (ret == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (ret == ICE_DCB_NO_HW_CHG) + ret = ICE_DCB_HW_CHG_RST; + +setapp_out: + mutex_unlock(&pf->tc_mutex); + return ret; +} + +/** + * ice_dcbnl_delapp - Delete local IEEE App config + * @netdev: relevant netdev + * @app: struct to hold app too delete + * + * Will not delete first application required by the FW + */ +static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *old_cfg, *new_cfg; + int i, j, ret = 0; + + if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) + return -EINVAL; + + mutex_lock(&pf->tc_mutex); + old_cfg = &pf->hw.port_info->local_dcbx_cfg; + + if (old_cfg->numapps <= 1) + goto delapp_out; + + ret = dcb_ieee_delapp(netdev, app); + if (ret) + goto delapp_out; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + for (i = 1; i < new_cfg->numapps; i++) { + if (app->selector == new_cfg->app[i].selector && + app->protocol == new_cfg->app[i].prot_id && + app->priority == new_cfg->app[i].priority) { + new_cfg->app[i].selector = 0; + new_cfg->app[i].prot_id = 0; + new_cfg->app[i].priority = 0; + break; + } + } + + /* Did not find DCB App */ + if (i == new_cfg->numapps) { + ret = -EINVAL; + goto delapp_out; + } + + new_cfg->numapps--; + + for (j = i; j < new_cfg->numapps; j++) { + new_cfg->app[i].selector = old_cfg->app[j + 1].selector; + new_cfg->app[i].prot_id = old_cfg->app[j + 1].prot_id; + new_cfg->app[i].priority = old_cfg->app[j + 1].priority; + } + + ret = ice_pf_dcb_cfg(pf, new_cfg, true); + /* return of zero indicates new cfg applied */ + if (ret == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (ret == ICE_DCB_NO_HW_CHG) + ret = ICE_DCB_HW_CHG_RST; + +delapp_out: + mutex_unlock(&pf->tc_mutex); + return ret; +} + +/** + * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW + * @netdev: the corresponding netdev + */ +static u8 ice_dcbnl_cee_set_all(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int err; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return ICE_DCB_NO_HW_CHG; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + mutex_lock(&pf->tc_mutex); + + err = ice_pf_dcb_cfg(pf, new_cfg, true); + + mutex_unlock(&pf->tc_mutex); + return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err; +} + +static const struct dcbnl_rtnl_ops dcbnl_ops = { + /* IEEE 802.1Qaz std */ + .ieee_getets = ice_dcbnl_getets, + .ieee_setets = ice_dcbnl_setets, + .ieee_getpfc = ice_dcbnl_getpfc, + .ieee_setpfc = ice_dcbnl_setpfc, + .ieee_setapp = ice_dcbnl_setapp, + .ieee_delapp = ice_dcbnl_delapp, + + /* CEE std */ + .getstate = ice_dcbnl_getstate, + .setstate = ice_dcbnl_setstate, + .getpermhwaddr = ice_dcbnl_get_perm_hw_addr, + .setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx, + .setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx, + .getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx, + .getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx, + .getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx, + .getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx, + .setpfccfg = ice_dcbnl_set_pfc_cfg, + .getpfccfg = ice_dcbnl_get_pfc_cfg, + .setall = ice_dcbnl_cee_set_all, + .getcap = ice_dcbnl_get_cap, + .getnumtcs = ice_dcbnl_getnumtcs, + .getpfcstate = ice_dcbnl_getpfcstate, + .getapp = ice_dcbnl_getapp, + + /* DCBX configuration */ + .getdcbx = ice_dcbnl_getdcbx, + .setdcbx = ice_dcbnl_setdcbx, +}; + +/** + * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config + * @vsi: pointer to VSI struct + */ +void ice_dcbnl_set_all(struct ice_vsi *vsi) +{ + struct net_device *netdev = vsi->netdev; + struct ice_dcbx_cfg *dcbxcfg; + struct ice_port_info *pi; + struct dcb_app sapp; + struct ice_pf *pf; + int i; + + if (!netdev) + return; + + pf = ice_netdev_to_pf(netdev); + pi = pf->hw.port_info; + + /* SW DCB taken care of by SW Default Config */ + if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) + return; + + /* DCB not enabled */ + if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags)) + return; + + dcbxcfg = &pi->local_dcbx_cfg; + + for (i = 0; i < dcbxcfg->numapps; i++) { + u8 prio, tc_map; + + prio = dcbxcfg->app[i].priority; + tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]); + + /* Add APP only if the TC is enabled for this VSI */ + if (tc_map & vsi->tc_cfg.ena_tc) { + sapp.selector = dcbxcfg->app[i].selector; + sapp.protocol = dcbxcfg->app[i].prot_id; + sapp.priority = prio; + dcb_ieee_setapp(netdev, &sapp); + } + } + /* Notify user-space of the changes */ + dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0); +} + +/** + * ice_dcbnl_vsi_del_app - Delete APP on all VSIs + * @vsi: pointer to the main VSI + * @app: APP to delete + * + * Delete given APP from all the VSIs for given PF + */ +static void +ice_dcbnl_vsi_del_app(struct ice_vsi *vsi, + struct ice_dcb_app_priority_table *app) +{ + struct dcb_app sapp; + int err; + + sapp.selector = app->selector; + sapp.protocol = app->prot_id; + sapp.priority = app->priority; + err = ice_dcbnl_delapp(vsi->netdev, &sapp); + dev_dbg(ice_pf_to_dev(vsi->back), "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n", + vsi->idx, err, app->selector, app->prot_id, app->priority); +} + +/** + * ice_dcbnl_flush_apps - Delete all removed APPs + * @pf: the corresponding PF + * @old_cfg: old DCBX configuration data + * @new_cfg: new DCBX configuration data + * + * Find and delete all APPS that are not present in the passed + * DCB configuration + */ +void +ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, + struct ice_dcbx_cfg *new_cfg) +{ + struct ice_vsi *main_vsi = ice_get_main_vsi(pf); + int i; + + if (!main_vsi) + return; + + for (i = 0; i < old_cfg->numapps; i++) { + struct ice_dcb_app_priority_table app = old_cfg->app[i]; + + /* The APP is not available anymore delete it */ + if (!ice_dcbnl_find_app(new_cfg, &app)) + ice_dcbnl_vsi_del_app(main_vsi, &app); + } +} + +/** + * ice_dcbnl_setup - setup DCBNL + * @vsi: VSI to get associated netdev from + */ +void ice_dcbnl_setup(struct ice_vsi *vsi) +{ + struct net_device *netdev = vsi->netdev; + struct ice_pf *pf; + + pf = ice_netdev_to_pf(netdev); + if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) + return; + + netdev->dcbnl_ops = &dcbnl_ops; + ice_dcbnl_set_all(vsi); +} diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h new file mode 100644 index 000000000000..6c630a362293 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_DCB_NL_H_ +#define _ICE_DCB_NL_H_ + +#ifdef CONFIG_DCB +void ice_dcbnl_setup(struct ice_vsi *vsi); +void ice_dcbnl_set_all(struct ice_vsi *vsi); +void +ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, + struct ice_dcbx_cfg *new_cfg); +#else +#define ice_dcbnl_setup(vsi) do {} while (0) +#define ice_dcbnl_set_all(vsi) do {} while (0) +#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0) +#endif /* CONFIG_DCB */ + +#endif /* _ICE_DCB_NL_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index f8d5c661d0ba..ce63017c56c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -11,5 +11,23 @@ #define ICE_DEV_ID_E810C_QSFP 0x1592 /* Intel(R) Ethernet Controller E810-C for SFP */ #define ICE_DEV_ID_E810C_SFP 0x1593 +/* Intel(R) Ethernet Connection E822-C for backplane */ +#define ICE_DEV_ID_E822C_BACKPLANE 0x1890 +/* Intel(R) Ethernet Connection E822-C for QSFP */ +#define ICE_DEV_ID_E822C_QSFP 0x1891 +/* Intel(R) Ethernet Connection E822-C for SFP */ +#define ICE_DEV_ID_E822C_SFP 0x1892 +/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E822C_10G_BASE_T 0x1893 +/* Intel(R) Ethernet Connection E822-C 1GbE */ +#define ICE_DEV_ID_E822C_SGMII 0x1894 +/* Intel(R) Ethernet Connection E822-X for backplane */ +#define ICE_DEV_ID_E822X_BACKPLANE 0x1897 +/* Intel(R) Ethernet Connection E822-L for SFP */ +#define ICE_DEV_ID_E822L_SFP 0x1898 +/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_E822L_10G_BASE_T 0x1899 +/* Intel(R) Ethernet Connection E822-L 1GbE */ +#define ICE_DEV_ID_E822L_SGMII 0x189A #endif /* _ICE_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 52083a63dee6..b002ab4e5838 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4,6 +4,7 @@ /* ethtool support for ice */ #include "ice.h" +#include "ice_flow.h" #include "ice_lib.h" #include "ice_dcb_lib.h" @@ -15,7 +16,7 @@ struct ice_stats { #define ICE_STAT(_type, _name, _stat) { \ .stat_string = _name, \ - .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ + .sizeof_stat = sizeof_field(_type, _stat), \ .stat_offset = offsetof(_type, _stat) \ } @@ -36,10 +37,10 @@ static int ice_q_stats_len(struct net_device *netdev) #define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats) #define ICE_PFC_STATS_LEN ( \ - (FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_rx) + \ - FIELD_SIZEOF(struct ice_pf, stats.priority_xon_rx) + \ - FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_tx) + \ - FIELD_SIZEOF(struct ice_pf, stats.priority_xon_tx)) \ + (sizeof_field(struct ice_pf, stats.priority_xoff_rx) + \ + sizeof_field(struct ice_pf, stats.priority_xon_rx) + \ + sizeof_field(struct ice_pf, stats.priority_xoff_tx) + \ + sizeof_field(struct ice_pf, stats.priority_xon_tx)) \ / sizeof(u64)) #define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \ ICE_VSI_STATS_LEN + ice_q_stats_len(n)) @@ -155,47 +156,34 @@ struct ice_priv_flag { static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA), - ICE_PRIV_FLAG("enable-fw-lldp", ICE_FLAG_ENABLE_FW_LLDP), + ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT), + ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), }; #define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) -/** - * ice_nvm_version_str - format the NVM version strings - * @hw: ptr to the hardware info - */ -static char *ice_nvm_version_str(struct ice_hw *hw) -{ - static char buf[ICE_ETHTOOL_FWVER_LEN]; - u8 ver, patch; - u32 full_ver; - u16 build; - - full_ver = hw->nvm.oem_ver; - ver = (u8)((full_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT); - build = (u16)((full_ver & ICE_OEM_VER_BUILD_MASK) >> - ICE_OEM_VER_BUILD_SHIFT); - patch = (u8)(full_ver & ICE_OEM_VER_PATCH_MASK); - - snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d", - (hw->nvm.ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT, - (hw->nvm.ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT, - hw->nvm.eetrack, ver, build, patch); - - return buf; -} - static void ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct ice_netdev_priv *np = netdev_priv(netdev); + u8 oem_ver, oem_patch, nvm_ver_hi, nvm_ver_lo; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u16 oem_build; strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version)); - strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw), - sizeof(drvinfo->fw_version)); + + /* Display NVM version (from which the firmware version can be + * determined) which contains more pertinent information. + */ + ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, + &nvm_ver_hi, &nvm_ver_lo); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%x.%02x 0x%x %d.%d.%d", nvm_ver_hi, nvm_ver_lo, + hw->nvm.eetrack, oem_ver, oem_build, oem_patch); + strlcpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; @@ -272,7 +260,7 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, int ret = 0; u16 *buf; - dev = &pf->pdev->dev; + dev = ice_pf_to_dev(pf); eeprom->magic = hw->vendor_id | (hw->device_id << 16); @@ -307,12 +295,15 @@ out: */ static bool ice_active_vfs(struct ice_pf *pf) { - struct ice_vf *vf = pf->vf; int i; - for (i = 0; i < pf->num_alloc_vfs; i++, vf++) + ice_for_each_vf(pf, i) { + struct ice_vf *vf = &pf->vf[i]; + if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) return true; + } + return false; } @@ -367,6 +358,7 @@ static u64 ice_eeprom_test(struct net_device *netdev) static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) { struct ice_pf *pf = (struct ice_pf *)hw->back; + struct device *dev = ice_pf_to_dev(pf); static const u32 patterns[] = { 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF @@ -382,8 +374,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) val = rd32(hw, reg); if (val == pattern) continue; - dev_err(&pf->pdev->dev, - "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n" + dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n" , __func__, reg, pattern, val); return 1; } @@ -391,8 +382,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) wr32(hw, reg, orig_val); val = rd32(hw, reg); if (val != orig_val) { - dev_err(&pf->pdev->dev, - "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n" + dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n" , __func__, reg, orig_val, val); return 1; } @@ -531,7 +521,7 @@ static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size) if (!pf) return -EINVAL; - data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL); if (!data) return -ENOMEM; @@ -648,7 +638,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) continue; rx_buf = &rx_ring->rx_buf[i]; - received_buf = page_address(rx_buf->page); + received_buf = page_address(rx_buf->page) + rx_buf->page_offset; if (ice_lbtest_check_frame(received_buf)) valid_frames++; @@ -673,9 +663,11 @@ static u64 ice_loopback_test(struct net_device *netdev) u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; LIST_HEAD(tmp_list); + struct device *dev; u8 *tx_frame; int i; + dev = ice_pf_to_dev(pf); netdev_info(netdev, "loopback test\n"); test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info); @@ -736,12 +728,12 @@ static u64 ice_loopback_test(struct net_device *netdev) ret = 10; lbtest_free_frame: - devm_kfree(&pf->pdev->dev, tx_frame); + devm_kfree(dev, tx_frame); remove_mac_filters: if (ice_remove_mac(&pf->hw, &tmp_list)) netdev_err(netdev, "Could not remove MAC filter for the test VSI"); free_mac_list: - ice_free_fltr_list(&pf->pdev->dev, &tmp_list); + ice_free_fltr_list(dev, &tmp_list); lbtest_mac_dis: /* Disable MAC loopback after the test is completed. */ if (ice_aq_set_mac_loopback(&pf->hw, false, NULL)) @@ -798,6 +790,9 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, struct ice_netdev_priv *np = netdev_priv(netdev); bool if_running = netif_running(netdev); struct ice_pf *pf = np->vsi->back; + struct device *dev; + + dev = ice_pf_to_dev(pf); if (eth_test->flags == ETH_TEST_FL_OFFLINE) { netdev_info(netdev, "offline testing starting\n"); @@ -805,8 +800,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, set_bit(__ICE_TESTING, pf->state); if (ice_active_vfs(pf)) { - dev_warn(&pf->pdev->dev, - "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); + dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); data[ICE_ETH_TEST_REG] = 1; data[ICE_ETH_TEST_EEPROM] = 1; data[ICE_ETH_TEST_INTR] = 1; @@ -840,8 +834,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, int status = ice_open(netdev); if (status) { - dev_err(&pf->pdev->dev, - "Could not open device %s, err %d", + dev_err(dev, "Could not open device %s, err %d", pf->int_name, status); } } @@ -986,7 +979,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec) } /* Get last SW configuration */ - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM; @@ -1031,7 +1024,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec) } done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; } @@ -1062,7 +1055,7 @@ ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) fec = ICE_FEC_NONE; break; default: - dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n", + dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n", fecparam->fec); return -EINVAL; } @@ -1107,7 +1100,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) break; } - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM; @@ -1134,7 +1127,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) fecparam->fec |= ETHTOOL_FEC_OFF; done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; } @@ -1179,12 +1172,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct device *dev; int ret = 0; u32 i; if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE)) return -EINVAL; + dev = ice_pf_to_dev(pf); set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS); @@ -1201,40 +1196,33 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS); - if (test_bit(ICE_FLAG_ENABLE_FW_LLDP, change_flags)) { - if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) { + if (test_bit(ICE_FLAG_FW_LLDP_AGENT, change_flags)) { + if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) { enum ice_status status; /* Disable FW LLDP engine */ - status = ice_aq_cfg_lldp_mib_change(&pf->hw, false, - NULL); + status = ice_cfg_lldp_mib_change(&pf->hw, false); + /* If unregistering for LLDP events fails, this is * not an error state, as there shouldn't be any * events to respond to. */ if (status) - dev_info(&pf->pdev->dev, - "Failed to unreg for LLDP events\n"); + dev_info(dev, "Failed to unreg for LLDP events\n"); /* The AQ call to stop the FW LLDP agent will generate * an error if the agent is already stopped. */ status = ice_aq_stop_lldp(&pf->hw, true, true, NULL); if (status) - dev_warn(&pf->pdev->dev, - "Fail to stop LLDP agent\n"); + dev_warn(dev, "Fail to stop LLDP agent\n"); /* Use case for having the FW LLDP agent stopped * will likely not need DCB, so failure to init is * not a concern of ethtool */ status = ice_init_pf_dcb(pf, true); if (status) - dev_warn(&pf->pdev->dev, "Fail to init DCB\n"); - - /* Forward LLDP packets to default VSI so that they - * are passed up the stack - */ - ice_cfg_sw_lldp(vsi, false, true); + dev_warn(dev, "Fail to init DCB\n"); } else { enum ice_status status; bool dcbx_agent_status; @@ -1244,8 +1232,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) */ status = ice_aq_start_lldp(&pf->hw, true, NULL); if (status) - dev_warn(&pf->pdev->dev, - "Fail to start LLDP Agent\n"); + dev_warn(dev, "Fail to start LLDP Agent\n"); /* AQ command to start FW DCBX agent will fail if * the agent is already started @@ -1254,10 +1241,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) &dcbx_agent_status, NULL); if (status) - dev_dbg(&pf->pdev->dev, - "Failed to start FW DCBX\n"); + dev_dbg(dev, "Failed to start FW DCBX\n"); - dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n", + dev_info(dev, "FW DCBX agent is %s\n", dcbx_agent_status ? "ACTIVE" : "DISABLED"); /* Failure to configure MIB change or init DCB is not @@ -1267,14 +1253,24 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) */ status = ice_init_pf_dcb(pf, true); if (status) - dev_dbg(&pf->pdev->dev, "Fail to init DCB\n"); + dev_dbg(dev, "Fail to init DCB\n"); /* Remove rule to direct LLDP packets to default VSI. * The FW LLDP engine will now be consuming them. */ ice_cfg_sw_lldp(vsi, false, false); + + /* Register for MIB change events */ + status = ice_cfg_lldp_mib_change(&pf->hw, true); + if (status) + dev_dbg(dev, "Fail to enable MIB change events\n"); } } + if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { + /* down and up VSI so that changes of Rx cfg are reflected. */ + ice_down(vsi); + ice_up(vsi); + } clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); return ret; } @@ -1319,14 +1315,17 @@ ice_get_ethtool_stats(struct net_device *netdev, struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_ring *ring; - unsigned int j = 0; + unsigned int j; int i = 0; char *p; + ice_update_pf_stats(pf); + ice_update_vsi_stats(vsi); + for (j = 0; j < ICE_VSI_STATS_LEN; j++) { p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset; data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat == - sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } /* populate per queue stats */ @@ -1716,291 +1715,14 @@ ice_get_settings_link_up(struct ethtool_link_ksettings *ks, struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); - struct ethtool_link_ksettings cap_ksettings; + struct ice_port_info *pi = np->vsi->port_info; struct ice_link_status *link_info; struct ice_vsi *vsi = np->vsi; - bool unrecog_phy_high = false; - bool unrecog_phy_low = false; link_info = &vsi->port_info->phy.link_info; - /* Initialize supported and advertised settings based on PHY settings */ - switch (link_info->phy_type_low) { - case ICE_PHY_TYPE_LOW_100BASE_TX: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100baseT_Full); - break; - case ICE_PHY_TYPE_LOW_100M_SGMII: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100baseT_Full); - break; - case ICE_PHY_TYPE_LOW_1000BASE_T: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 1000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_1G_SGMII: - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_1000BASE_SX: - case ICE_PHY_TYPE_LOW_1000BASE_LX: - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseX_Full); - break; - case ICE_PHY_TYPE_LOW_1000BASE_KX: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseKX_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 1000baseKX_Full); - break; - case ICE_PHY_TYPE_LOW_2500BASE_T: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 2500baseT_Full); - break; - case ICE_PHY_TYPE_LOW_2500BASE_X: - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseX_Full); - break; - case ICE_PHY_TYPE_LOW_2500BASE_KX: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseX_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 2500baseX_Full); - break; - case ICE_PHY_TYPE_LOW_5GBASE_T: - case ICE_PHY_TYPE_LOW_5GBASE_KR: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 5000baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 5000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_T: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 10000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_10G_SFI_DA: - case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC: - case ICE_PHY_TYPE_LOW_10G_SFI_C2C: - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_SR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseSR_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_LR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseLR_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseKR_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 10000baseKR_Full); - break; - case ICE_PHY_TYPE_LOW_25GBASE_T: - case ICE_PHY_TYPE_LOW_25GBASE_CR: - case ICE_PHY_TYPE_LOW_25GBASE_CR_S: - case ICE_PHY_TYPE_LOW_25GBASE_CR1: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseCR_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 25000baseCR_Full); - break; - case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC: - case ICE_PHY_TYPE_LOW_25G_AUI_C2C: - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseCR_Full); - break; - case ICE_PHY_TYPE_LOW_25GBASE_SR: - case ICE_PHY_TYPE_LOW_25GBASE_LR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseSR_Full); - break; - case ICE_PHY_TYPE_LOW_25GBASE_KR: - case ICE_PHY_TYPE_LOW_25GBASE_KR1: - case ICE_PHY_TYPE_LOW_25GBASE_KR_S: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseKR_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 25000baseKR_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_CR4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseCR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 40000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC: - case ICE_PHY_TYPE_LOW_40G_XLAUI: - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_SR4: - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseSR4_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_LR4: - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseLR4_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_KR4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseKR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 40000baseKR4_Full); - break; - case ICE_PHY_TYPE_LOW_50GBASE_CR2: - case ICE_PHY_TYPE_LOW_50GBASE_CP: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseCR2_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 50000baseCR2_Full); - break; - case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC: - case ICE_PHY_TYPE_LOW_50G_LAUI2: - case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC: - case ICE_PHY_TYPE_LOW_50G_AUI2: - case ICE_PHY_TYPE_LOW_50GBASE_SR: - case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC: - case ICE_PHY_TYPE_LOW_50G_AUI1: - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseCR2_Full); - break; - case ICE_PHY_TYPE_LOW_50GBASE_KR2: - case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseKR2_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 50000baseKR2_Full); - break; - case ICE_PHY_TYPE_LOW_50GBASE_SR2: - case ICE_PHY_TYPE_LOW_50GBASE_LR2: - case ICE_PHY_TYPE_LOW_50GBASE_FR: - case ICE_PHY_TYPE_LOW_50GBASE_LR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseSR2_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_CR4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC: - case ICE_PHY_TYPE_LOW_100G_CAUI4: - case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC: - case ICE_PHY_TYPE_LOW_100G_AUI4: - case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_CP2: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_SR4: - case ICE_PHY_TYPE_LOW_100GBASE_SR2: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseSR4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_LR4: - case ICE_PHY_TYPE_LOW_100GBASE_DR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseLR4_ER4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_KR4: - case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseKR4_Full); - break; - default: - unrecog_phy_low = true; - } - - switch (link_info->phy_type_high) { - case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseKR4_Full); - break; - case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC: - case ICE_PHY_TYPE_HIGH_100G_CAUI2: - case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC: - case ICE_PHY_TYPE_HIGH_100G_AUI2: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - break; - default: - unrecog_phy_high = true; - } - - if (unrecog_phy_low && unrecog_phy_high) { - /* if we got here and link is up something bad is afoot */ - netdev_info(netdev, - "WARNING: Unrecognized PHY_Low (0x%llx).\n", - (u64)link_info->phy_type_low); - netdev_info(netdev, - "WARNING: Unrecognized PHY_High (0x%llx).\n", - (u64)link_info->phy_type_high); - } - - /* Now that we've worked out everything that could be supported by the - * current PHY type, get what is supported by the NVM and intersect - * them to get what is truly supported - */ - memset(&cap_ksettings, 0, sizeof(cap_ksettings)); - ice_phy_type_to_ethtool(netdev, &cap_ksettings); - ethtool_intersect_link_masks(ks, &cap_ksettings); + /* Get supported and advertised settings from PHY ability with media */ + ice_phy_type_to_ethtool(netdev, ks); switch (link_info->link_speed) { case ICE_AQ_LINK_SPEED_100GB: @@ -2034,12 +1756,38 @@ ice_get_settings_link_up(struct ethtool_link_ksettings *ks, ks->base.speed = SPEED_100; break; default: - netdev_info(netdev, - "WARNING: Unrecognized link_speed (0x%x).\n", + netdev_info(netdev, "WARNING: Unrecognized link_speed (0x%x).\n", link_info->link_speed); break; } ks->base.duplex = DUPLEX_FULL; + + if (link_info->an_info & ICE_AQ_AN_COMPLETED) + ethtool_link_ksettings_add_link_mode(ks, lp_advertising, + Autoneg); + + /* Set flow control negotiated Rx/Tx pause */ + switch (pi->fc.current_mode) { + case ICE_FC_FULL: + ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause); + break; + case ICE_FC_TX_PAUSE: + ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause); + ethtool_link_ksettings_add_link_mode(ks, lp_advertising, + Asym_Pause); + break; + case ICE_FC_RX_PAUSE: + ethtool_link_ksettings_add_link_mode(ks, lp_advertising, + Asym_Pause); + break; + case ICE_FC_PFC: + /* fall through */ + default: + ethtool_link_ksettings_del_link_mode(ks, lp_advertising, Pause); + ethtool_link_ksettings_del_link_mode(ks, lp_advertising, + Asym_Pause); + break; + } } /** @@ -2078,9 +1826,12 @@ ice_get_link_ksettings(struct net_device *netdev, struct ice_aqc_get_phy_caps_data *caps; struct ice_link_status *hw_link_info; struct ice_vsi *vsi = np->vsi; + enum ice_status status; + int err = 0; ethtool_link_ksettings_zero_link_mode(ks, supported); ethtool_link_ksettings_zero_link_mode(ks, advertising); + ethtool_link_ksettings_zero_link_mode(ks, lp_advertising); hw_link_info = &vsi->port_info->phy.link_info; /* set speed and duplex */ @@ -2125,48 +1876,36 @@ ice_get_link_ksettings(struct net_device *netdev, /* flow control is symmetric and always supported */ ethtool_link_ksettings_add_link_mode(ks, supported, Pause); - switch (vsi->port_info->fc.req_mode) { - case ICE_FC_FULL: + caps = kzalloc(sizeof(*caps), GFP_KERNEL); + if (!caps) + return -ENOMEM; + + status = ice_aq_get_phy_caps(vsi->port_info, false, + ICE_AQC_REPORT_SW_CFG, caps, NULL); + if (status) { + err = -EIO; + goto done; + } + + /* Set the advertised flow control based on the PHY capability */ + if ((caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) && + (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) { ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); - break; - case ICE_FC_TX_PAUSE: ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); - break; - case ICE_FC_RX_PAUSE: + } else if (caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) { + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); + } else if (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) { ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); - break; - case ICE_FC_PFC: - default: + } else { ethtool_link_ksettings_del_link_mode(ks, advertising, Pause); ethtool_link_ksettings_del_link_mode(ks, advertising, Asym_Pause); - break; } - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); - if (!caps) - goto done; - - if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_TOPO_CAP, - caps, NULL)) - netdev_info(netdev, "Get phy capability failed.\n"); - - /* Set supported FEC modes based on PHY capability */ - ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); - - if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN || - caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN) - ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); - if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN) - ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); - - if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_SW_CFG, - caps, NULL)) - netdev_info(netdev, "Get phy capability failed.\n"); - /* Set advertised FEC modes based on PHY capability */ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); @@ -2178,9 +1917,25 @@ ice_get_link_ksettings(struct net_device *netdev, caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ) ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + status = ice_aq_get_phy_caps(vsi->port_info, false, + ICE_AQC_REPORT_TOPO_CAP, caps, NULL); + if (status) { + err = -EIO; + goto done; + } + + /* Set supported FEC modes based on PHY capability */ + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); + + if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN) + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); + if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN) + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + done: - devm_kfree(&vsi->back->pdev->dev, caps); - return 0; + kfree(caps); + return err; } /** @@ -2408,8 +2163,7 @@ ice_set_link_ksettings(struct net_device *netdev, usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX); } - abilities = devm_kzalloc(&pf->pdev->dev, sizeof(*abilities), - GFP_KERNEL); + abilities = kzalloc(sizeof(*abilities), GFP_KERNEL); if (!abilities) return -ENOMEM; @@ -2501,13 +2255,250 @@ ice_set_link_ksettings(struct net_device *netdev, } done: - devm_kfree(&pf->pdev->dev, abilities); + kfree(abilities); clear_bit(__ICE_CFG_BUSY, pf->state); return err; } /** + * ice_parse_hdrs - parses headers from RSS hash input + * @nfc: ethtool rxnfc command + * + * This function parses the rxnfc command and returns intended + * header types for RSS configuration + */ +static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc) +{ + u32 hdrs = ICE_FLOW_SEG_HDR_NONE; + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4; + break; + case UDP_V4_FLOW: + hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4; + break; + case SCTP_V4_FLOW: + hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4; + break; + case TCP_V6_FLOW: + hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6; + break; + case UDP_V6_FLOW: + hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6; + break; + case SCTP_V6_FLOW: + hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6; + break; + default: + break; + } + return hdrs; +} + +#define ICE_FLOW_HASH_FLD_IPV4_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) +#define ICE_FLOW_HASH_FLD_IPV6_SA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) +#define ICE_FLOW_HASH_FLD_IPV4_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) +#define ICE_FLOW_HASH_FLD_IPV6_DA BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) +#define ICE_FLOW_HASH_FLD_TCP_SRC_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) +#define ICE_FLOW_HASH_FLD_TCP_DST_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT) +#define ICE_FLOW_HASH_FLD_UDP_SRC_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) +#define ICE_FLOW_HASH_FLD_UDP_DST_PORT BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT) +#define ICE_FLOW_HASH_FLD_SCTP_SRC_PORT \ + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) +#define ICE_FLOW_HASH_FLD_SCTP_DST_PORT \ + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT) + +/** + * ice_parse_hash_flds - parses hash fields from RSS hash input + * @nfc: ethtool rxnfc command + * + * This function parses the rxnfc command and returns intended + * hash fields for RSS configuration + */ +static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc) +{ + u64 hfld = ICE_HASH_INVALID; + + if (nfc->data & RXH_IP_SRC || nfc->data & RXH_IP_DST) { + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + if (nfc->data & RXH_IP_SRC) + hfld |= ICE_FLOW_HASH_FLD_IPV4_SA; + if (nfc->data & RXH_IP_DST) + hfld |= ICE_FLOW_HASH_FLD_IPV4_DA; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + if (nfc->data & RXH_IP_SRC) + hfld |= ICE_FLOW_HASH_FLD_IPV6_SA; + if (nfc->data & RXH_IP_DST) + hfld |= ICE_FLOW_HASH_FLD_IPV6_DA; + break; + default: + break; + } + } + + if (nfc->data & RXH_L4_B_0_1 || nfc->data & RXH_L4_B_2_3) { + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (nfc->data & RXH_L4_B_0_1) + hfld |= ICE_FLOW_HASH_FLD_TCP_SRC_PORT; + if (nfc->data & RXH_L4_B_2_3) + hfld |= ICE_FLOW_HASH_FLD_TCP_DST_PORT; + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + if (nfc->data & RXH_L4_B_0_1) + hfld |= ICE_FLOW_HASH_FLD_UDP_SRC_PORT; + if (nfc->data & RXH_L4_B_2_3) + hfld |= ICE_FLOW_HASH_FLD_UDP_DST_PORT; + break; + case SCTP_V4_FLOW: + case SCTP_V6_FLOW: + if (nfc->data & RXH_L4_B_0_1) + hfld |= ICE_FLOW_HASH_FLD_SCTP_SRC_PORT; + if (nfc->data & RXH_L4_B_2_3) + hfld |= ICE_FLOW_HASH_FLD_SCTP_DST_PORT; + break; + default: + break; + } + } + + return hfld; +} + +/** + * ice_set_rss_hash_opt - Enable/Disable flow types for RSS hash + * @vsi: the VSI being configured + * @nfc: ethtool rxnfc command + * + * Returns Success if the flow input set is supported. + */ +static int +ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + u64 hashed_flds; + u32 hdrs; + + dev = ice_pf_to_dev(pf); + if (ice_is_safe_mode(pf)) { + dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n", + vsi->vsi_num); + return -EINVAL; + } + + hashed_flds = ice_parse_hash_flds(nfc); + if (hashed_flds == ICE_HASH_INVALID) { + dev_dbg(dev, "Invalid hash fields, vsi num = %d\n", + vsi->vsi_num); + return -EINVAL; + } + + hdrs = ice_parse_hdrs(nfc); + if (hdrs == ICE_FLOW_SEG_HDR_NONE) { + dev_dbg(dev, "Header type is not valid, vsi num = %d\n", + vsi->vsi_num); + return -EINVAL; + } + + status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs); + if (status) { + dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %d\n", + vsi->vsi_num, status); + return -EINVAL; + } + + return 0; +} + +/** + * ice_get_rss_hash_opt - Retrieve hash fields for a given flow-type + * @vsi: the VSI being configured + * @nfc: ethtool rxnfc command + */ +static void +ice_get_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + u64 hash_flds; + u32 hdrs; + + dev = ice_pf_to_dev(pf); + + nfc->data = 0; + if (ice_is_safe_mode(pf)) { + dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n", + vsi->vsi_num); + return; + } + + hdrs = ice_parse_hdrs(nfc); + if (hdrs == ICE_FLOW_SEG_HDR_NONE) { + dev_dbg(dev, "Header type is not valid, vsi num = %d\n", + vsi->vsi_num); + return; + } + + hash_flds = ice_get_rss_cfg(&pf->hw, vsi->idx, hdrs); + if (hash_flds == ICE_HASH_INVALID) { + dev_dbg(dev, "No hash fields found for the given header type, vsi num = %d\n", + vsi->vsi_num); + return; + } + + if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_SA || + hash_flds & ICE_FLOW_HASH_FLD_IPV6_SA) + nfc->data |= (u64)RXH_IP_SRC; + + if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_DA || + hash_flds & ICE_FLOW_HASH_FLD_IPV6_DA) + nfc->data |= (u64)RXH_IP_DST; + + if (hash_flds & ICE_FLOW_HASH_FLD_TCP_SRC_PORT || + hash_flds & ICE_FLOW_HASH_FLD_UDP_SRC_PORT || + hash_flds & ICE_FLOW_HASH_FLD_SCTP_SRC_PORT) + nfc->data |= (u64)RXH_L4_B_0_1; + + if (hash_flds & ICE_FLOW_HASH_FLD_TCP_DST_PORT || + hash_flds & ICE_FLOW_HASH_FLD_UDP_DST_PORT || + hash_flds & ICE_FLOW_HASH_FLD_SCTP_DST_PORT) + nfc->data |= (u64)RXH_L4_B_2_3; +} + +/** + * ice_set_rxnfc - command to set Rx flow rules. + * @netdev: network interface device structure + * @cmd: ethtool rxnfc command + * + * Returns 0 for success and negative values for errors + */ +static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + return ice_set_rss_hash_opt(vsi, cmd); + default: + break; + } + return -EOPNOTSUPP; +} + +/** * ice_get_rxnfc - command to get Rx flow classification rules * @netdev: network interface device structure * @cmd: ethtool rxnfc command @@ -2528,6 +2519,10 @@ ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, cmd->data = vsi->rss_size; ret = 0; break; + case ETHTOOL_GRXFH: + ice_get_rss_hash_opt(vsi, cmd); + ret = 0; + break; default: break; } @@ -2558,6 +2553,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { struct ice_ring *tx_rings = NULL, *rx_rings = NULL; struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_ring *xdp_rings = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; int i, timeout = 50, err = 0; @@ -2576,13 +2572,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE); if (new_tx_cnt != ring->tx_pending) - netdev_info(netdev, - "Requested Tx descriptor count rounded up to %d\n", + netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", new_tx_cnt); new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE); if (new_rx_cnt != ring->rx_pending) - netdev_info(netdev, - "Requested Rx descriptor count rounded up to %d\n", + netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", new_rx_cnt); /* if nothing to do return success */ @@ -2592,6 +2586,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) return 0; } + /* If there is a AF_XDP UMEM attached to any of Rx rings, + * disallow changing the number of descriptors -- regardless + * if the netdev is running or not. + */ + if (ice_xsk_any_rx_ring_ena(vsi)) + return -EBUSY; + while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) @@ -2605,6 +2606,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) vsi->tx_rings[i]->count = new_tx_cnt; for (i = 0; i < vsi->alloc_rxq; i++) vsi->rx_rings[i]->count = new_rx_cnt; + if (ice_is_xdp_ena_vsi(vsi)) + for (i = 0; i < vsi->num_xdp_txq; i++) + vsi->xdp_rings[i]->count = new_tx_cnt; + vsi->num_tx_desc = new_tx_cnt; + vsi->num_rx_desc = new_rx_cnt; netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); goto done; } @@ -2616,14 +2622,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n", vsi->tx_rings[0]->count, new_tx_cnt); - tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, - sizeof(*tx_rings), GFP_KERNEL); + tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL); if (!tx_rings) { err = -ENOMEM; goto done; } - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_txq(vsi, i) { /* clone ring and setup updated count */ tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_cnt; @@ -2631,15 +2636,42 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) tx_rings[i].tx_buf = NULL; err = ice_setup_tx_ring(&tx_rings[i]); if (err) { - while (i) { - i--; + while (i--) ice_clean_tx_ring(&tx_rings[i]); - } - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); goto done; } } + if (!ice_is_xdp_ena_vsi(vsi)) + goto process_rx; + + /* alloc updated XDP resources */ + netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n", + vsi->xdp_rings[0]->count, new_tx_cnt); + + xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL); + if (!xdp_rings) { + err = -ENOMEM; + goto free_tx; + } + + for (i = 0; i < vsi->num_xdp_txq; i++) { + /* clone ring and setup updated count */ + xdp_rings[i] = *vsi->xdp_rings[i]; + xdp_rings[i].count = new_tx_cnt; + xdp_rings[i].desc = NULL; + xdp_rings[i].tx_buf = NULL; + err = ice_setup_tx_ring(&xdp_rings[i]); + if (err) { + while (i--) + ice_clean_tx_ring(&xdp_rings[i]); + kfree(xdp_rings); + goto free_tx; + } + ice_set_ring_xdp(&xdp_rings[i]); + } + process_rx: if (new_rx_cnt == vsi->rx_rings[0]->count) goto process_link; @@ -2648,14 +2680,13 @@ process_rx: netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n", vsi->rx_rings[0]->count, new_rx_cnt); - rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, - sizeof(*rx_rings), GFP_KERNEL); + rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL); if (!rx_rings) { err = -ENOMEM; goto done; } - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_rxq(vsi, i) { /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; @@ -2679,7 +2710,7 @@ rx_unwind: i--; ice_free_rx_ring(&rx_rings[i]); } - devm_kfree(&pf->pdev->dev, rx_rings); + kfree(rx_rings); err = -ENOMEM; goto free_tx; } @@ -2693,15 +2724,15 @@ process_link: ice_down(vsi); if (tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_txq(vsi, i) { ice_free_tx_ring(vsi->tx_rings[i]); *vsi->tx_rings[i] = tx_rings[i]; } - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); } if (rx_rings) { - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_rxq(vsi, i) { ice_free_rx_ring(vsi->rx_rings[i]); /* copy the real tail offset */ rx_rings[i].tail = vsi->rx_rings[i]->tail; @@ -2715,9 +2746,19 @@ process_link: rx_rings[i].next_to_alloc = 0; *vsi->rx_rings[i] = rx_rings[i]; } - devm_kfree(&pf->pdev->dev, rx_rings); + kfree(rx_rings); + } + + if (xdp_rings) { + for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_free_tx_ring(vsi->xdp_rings[i]); + *vsi->xdp_rings[i] = xdp_rings[i]; + } + kfree(xdp_rings); } + vsi->num_tx_desc = new_tx_cnt; + vsi->num_rx_desc = new_rx_cnt; ice_up(vsi); } goto done; @@ -2725,9 +2766,9 @@ process_link: free_tx: /* error cleanup if the Rx allocations failed after getting Tx */ if (tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) + ice_for_each_txq(vsi, i) ice_free_tx_ring(&tx_rings[i]); - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); } done: @@ -2763,6 +2804,11 @@ static int ice_nway_reset(struct net_device *netdev) * ice_get_pauseparam - Get Flow Control status * @netdev: network interface device structure * @pause: ethernet pause (flow control) parameters + * + * Get requested flow control status from PHY capability. + * If autoneg is true, then ethtool will send the ETHTOOL_GSET ioctl which + * is handled by ice_get_link_ksettings. ice_get_link_ksettings will report + * the negotiated Rx/Tx pause via lp_advertising. */ static void ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) @@ -2770,7 +2816,6 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_port_info *pi = np->vsi->port_info; struct ice_aqc_get_phy_caps_data *pcaps; - struct ice_vsi *vsi = np->vsi; struct ice_dcbx_cfg *dcbx_cfg; enum ice_status status; @@ -2780,8 +2825,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) dcbx_cfg = &pi->local_dcbx_cfg; - pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps), - GFP_KERNEL); + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return; @@ -2804,7 +2848,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) pause->rx_pause = 1; out: - devm_kfree(&vsi->back->pdev->dev, pcaps); + kfree(pcaps); } /** @@ -2816,6 +2860,7 @@ static int ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_aqc_get_phy_caps_data *pcaps; struct ice_link_status *hw_link_info; struct ice_pf *pf = np->vsi->back; struct ice_dcbx_cfg *dcbx_cfg; @@ -2826,6 +2871,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) u8 aq_failures; bool link_up; int err = 0; + u32 is_an; pi = vsi->port_info; hw_link_info = &pi->phy.link_info; @@ -2840,7 +2886,30 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) return -EOPNOTSUPP; } - if (pause->autoneg != (hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { + /* Get pause param reports configured and negotiated flow control pause + * when ETHTOOL_GLINKSETTINGS is defined. Since ETHTOOL_GLINKSETTINGS is + * defined get pause param pause->autoneg reports SW configured setting, + * so compare pause->autoneg with SW configured to prevent the user from + * using set pause param to chance autoneg. + */ + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return -ENOMEM; + + /* Get current PHY config */ + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + NULL); + if (status) { + kfree(pcaps); + return -EIO; + } + + is_an = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ? + AUTONEG_ENABLE : AUTONEG_DISABLE); + + kfree(pcaps); + + if (pause->autoneg != is_an) { netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n"); return -EOPNOTSUPP; } @@ -2960,7 +3029,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) return -EIO; } - lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; @@ -2973,7 +3042,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) indir[i] = (u32)(lut[i]); out: - devm_kfree(&pf->pdev->dev, lut); + kfree(lut); return ret; } @@ -2994,8 +3063,10 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct device *dev; u8 *seed = NULL; + dev = ice_pf_to_dev(pf); if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; @@ -3008,8 +3079,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = - devm_kzalloc(&pf->pdev->dev, - ICE_VSIQF_HKEY_ARRAY_SIZE, + devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE, GFP_KERNEL); if (!vsi->rss_hkey_user) return -ENOMEM; @@ -3019,8 +3089,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, } if (!vsi->rss_lut_user) { - vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev, - vsi->rss_table_size, + vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size, GFP_KERNEL); if (!vsi->rss_lut_user) return -ENOMEM; @@ -3043,6 +3112,188 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return 0; } +/** + * ice_get_max_txq - return the maximum number of Tx queues for in a PF + * @pf: PF structure + */ +static int ice_get_max_txq(struct ice_pf *pf) +{ + return min_t(int, num_online_cpus(), + pf->hw.func_caps.common_cap.num_txq); +} + +/** + * ice_get_max_rxq - return the maximum number of Rx queues for in a PF + * @pf: PF structure + */ +static int ice_get_max_rxq(struct ice_pf *pf) +{ + return min_t(int, num_online_cpus(), + pf->hw.func_caps.common_cap.num_rxq); +} + +/** + * ice_get_combined_cnt - return the current number of combined channels + * @vsi: PF VSI pointer + * + * Go through all queue vectors and count ones that have both Rx and Tx ring + * attached + */ +static u32 ice_get_combined_cnt(struct ice_vsi *vsi) +{ + u32 combined = 0; + int q_idx; + + ice_for_each_q_vector(vsi, q_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring && q_vector->tx.ring) + combined++; + } + + return combined; +} + +/** + * ice_get_channels - get the current and max supported channels + * @dev: network interface device structure + * @ch: ethtool channel data structure + */ +static void +ice_get_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + /* check to see if VSI is active */ + if (test_bit(__ICE_DOWN, vsi->state)) + return; + + /* report maximum channels */ + ch->max_rx = ice_get_max_rxq(pf); + ch->max_tx = ice_get_max_txq(pf); + ch->max_combined = min_t(int, ch->max_rx, ch->max_tx); + + /* report current channels */ + ch->combined_count = ice_get_combined_cnt(vsi); + ch->rx_count = vsi->num_rxq - ch->combined_count; + ch->tx_count = vsi->num_txq - ch->combined_count; +} + +/** + * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size + * @vsi: VSI to reconfigure RSS LUT on + * @req_rss_size: requested range of queue numbers for hashing + * + * Set the VSI's RSS parameters, configure the RSS LUT based on these. + */ +static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + struct ice_hw *hw; + int err = 0; + u8 *lut; + + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + + if (!req_rss_size) + return -EINVAL; + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* set RSS LUT parameters */ + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + vsi->rss_size = 1; + } else { + struct ice_hw_common_caps *caps = &hw->func_caps.common_cap; + + vsi->rss_size = min_t(int, req_rss_size, + BIT(caps->rss_table_entry_width)); + } + + /* create/set RSS LUT */ + ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); + status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, + vsi->rss_table_size); + if (status) { + dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + err = -EIO; + } + + kfree(lut); + return err; +} + +/** + * ice_set_channels - set the number channels + * @dev: network interface device structure + * @ch: ethtool channel data structure + */ +static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int new_rx = 0, new_tx = 0; + u32 curr_combined; + + /* do not support changing channels in Safe Mode */ + if (ice_is_safe_mode(pf)) { + netdev_err(dev, "Changing channel in Safe Mode is not supported\n"); + return -EOPNOTSUPP; + } + /* do not support changing other_count */ + if (ch->other_count) + return -EINVAL; + + curr_combined = ice_get_combined_cnt(vsi); + + /* these checks are for cases where user didn't specify a particular + * value on cmd line but we get non-zero value anyway via + * get_channels(); look at ethtool.c in ethtool repository (the user + * space part), particularly, do_schannels() routine + */ + if (ch->rx_count == vsi->num_rxq - curr_combined) + ch->rx_count = 0; + if (ch->tx_count == vsi->num_txq - curr_combined) + ch->tx_count = 0; + if (ch->combined_count == curr_combined) + ch->combined_count = 0; + + if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) { + netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n"); + return -EINVAL; + } + + new_rx = ch->combined_count + ch->rx_count; + new_tx = ch->combined_count + ch->tx_count; + + if (new_rx > ice_get_max_rxq(pf)) { + netdev_err(dev, "Maximum allowed Rx channels is %d\n", + ice_get_max_rxq(pf)); + return -EINVAL; + } + if (new_tx > ice_get_max_txq(pf)) { + netdev_err(dev, "Maximum allowed Tx channels is %d\n", + ice_get_max_txq(pf)); + return -EINVAL; + } + + ice_vsi_recfg_qs(vsi, new_rx, new_tx); + + if (new_rx && !netif_is_rxfh_configured(dev)) + return ice_vsi_set_dflt_rss_lut(vsi, new_rx); + + return 0; +} + enum ice_container_type { ICE_RX_CONTAINER, ICE_TX_CONTAINER, @@ -3082,7 +3333,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; break; default: - dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type); + dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type); return -EINVAL; } @@ -3146,12 +3397,6 @@ __ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_get_q_coalesce(vsi, ec, q_num)) return -EINVAL; - if (q_num < vsi->num_txq) - ec->tx_max_coalesced_frames_irq = vsi->work_lmt; - - if (q_num < vsi->num_rxq) - ec->rx_max_coalesced_frames_irq = vsi->work_lmt; - return 0; } @@ -3185,25 +3430,24 @@ static int ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, struct ice_ring_container *rc, struct ice_vsi *vsi) { + const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx"; + u32 use_adaptive_coalesce, coalesce_usecs; struct ice_pf *pf = vsi->back; u16 itr_setting; if (!rc->ring) return -EINVAL; - itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; - switch (c_type) { case ICE_RX_CONTAINER: if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) { - netdev_info(vsi->netdev, - "Invalid value, rx-usecs-high valid values are 0 (disabled), %d-%d\n", - pf->hw.intrl_gran, ICE_MAX_INTRL); + netdev_info(vsi->netdev, "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n", + c_type_str, pf->hw.intrl_gran, + ICE_MAX_INTRL); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx), @@ -3211,60 +3455,57 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, pf->hw.intrl_gran)); } - if (ec->rx_coalesce_usecs != itr_setting && - ec->use_adaptive_rx_coalesce) { - netdev_info(vsi->netdev, - "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n"); - return -EINVAL; - } - - if (ec->rx_coalesce_usecs > ICE_ITR_MAX) { - netdev_info(vsi->netdev, - "Invalid value, rx-usecs range is 0-%d\n", - ICE_ITR_MAX); - return -EINVAL; - } + use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; + coalesce_usecs = ec->rx_coalesce_usecs; - if (ec->use_adaptive_rx_coalesce) { - rc->itr_setting |= ICE_ITR_DYNAMIC; - } else { - rc->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - } break; case ICE_TX_CONTAINER: if (ec->tx_coalesce_usecs_high) { - netdev_info(vsi->netdev, - "setting tx-usecs-high is not supported\n"); + netdev_info(vsi->netdev, "setting %s-usecs-high is not supported\n", + c_type_str); return -EINVAL; } - if (ec->tx_coalesce_usecs != itr_setting && - ec->use_adaptive_tx_coalesce) { - netdev_info(vsi->netdev, - "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n"); - return -EINVAL; - } - - if (ec->tx_coalesce_usecs > ICE_ITR_MAX) { - netdev_info(vsi->netdev, - "Invalid value, tx-usecs range is 0-%d\n", - ICE_ITR_MAX); - return -EINVAL; - } + use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; + coalesce_usecs = ec->tx_coalesce_usecs; - if (ec->use_adaptive_tx_coalesce) { - rc->itr_setting |= ICE_ITR_DYNAMIC; - } else { - rc->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - } break; default: - dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type); + dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n", + c_type); + return -EINVAL; + } + + itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; + if (coalesce_usecs != itr_setting && use_adaptive_coalesce) { + netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n", + c_type_str, c_type_str); + return -EINVAL; + } + + if (coalesce_usecs > ICE_ITR_MAX) { + netdev_info(vsi->netdev, "Invalid value, %s-usecs range is 0-%d\n", + c_type_str, ICE_ITR_MAX); return -EINVAL; } + /* hardware only supports an ITR granularity of 2us */ + if (coalesce_usecs % 2 != 0) { + netdev_info(vsi->netdev, "Invalid value, %s-usecs must be even\n", + c_type_str); + return -EINVAL; + } + + if (use_adaptive_coalesce) { + rc->itr_setting |= ICE_ITR_DYNAMIC; + } else { + /* store user facing value how it was set */ + rc->itr_setting = coalesce_usecs; + /* set to static and convert to value HW understands */ + rc->target_itr = + ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting)); + } + return 0; } @@ -3309,6 +3550,53 @@ ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) } /** + * ice_is_coalesce_param_invalid - check for unsupported coalesce parameters + * @netdev: pointer to the netdev associated with this query + * @ec: ethtool structure to fill with driver's coalesce settings + * + * Print netdev info if driver doesn't support one of the parameters + * and return error. When any parameters will be implemented, remove only + * this parameter from param array. + */ +static int +ice_is_coalesce_param_invalid(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct ice_ethtool_not_used { + u32 value; + const char *name; + } param[] = { + {ec->stats_block_coalesce_usecs, "stats-block-usecs"}, + {ec->rate_sample_interval, "sample-interval"}, + {ec->pkt_rate_low, "pkt-rate-low"}, + {ec->pkt_rate_high, "pkt-rate-high"}, + {ec->rx_max_coalesced_frames, "rx-frames"}, + {ec->rx_coalesce_usecs_irq, "rx-usecs-irq"}, + {ec->rx_max_coalesced_frames_irq, "rx-frames-irq"}, + {ec->tx_max_coalesced_frames, "tx-frames"}, + {ec->tx_coalesce_usecs_irq, "tx-usecs-irq"}, + {ec->tx_max_coalesced_frames_irq, "tx-frames-irq"}, + {ec->rx_coalesce_usecs_low, "rx-usecs-low"}, + {ec->rx_max_coalesced_frames_low, "rx-frames-low"}, + {ec->tx_coalesce_usecs_low, "tx-usecs-low"}, + {ec->tx_max_coalesced_frames_low, "tx-frames-low"}, + {ec->rx_max_coalesced_frames_high, "rx-frames-high"}, + {ec->tx_max_coalesced_frames_high, "tx-frames-high"} + }; + int i; + + for (i = 0; i < ARRAY_SIZE(param); i++) { + if (param[i].value) { + netdev_info(netdev, "Setting %s not supported\n", + param[i].name); + return -EINVAL; + } + } + + return 0; +} + +/** * __ice_set_coalesce - set ITR/INTRL values for the device * @netdev: pointer to the netdev associated with this query * @ec: ethtool structure to fill with driver's coalesce settings @@ -3324,24 +3612,30 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + if (ice_is_coalesce_param_invalid(netdev, ec)) + return -EINVAL; + if (q_num < 0) { - int i; + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) { + /* In some cases if DCB is configured the num_[rx|tx]q + * can be less than vsi->num_q_vectors. This check + * accounts for that so we don't report a false failure + */ + if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq) + goto set_complete; - ice_for_each_q_vector(vsi, i) { - if (ice_set_q_coalesce(vsi, ec, i)) + if (ice_set_q_coalesce(vsi, ec, v_idx)) return -EINVAL; } - goto set_work_lmt; + goto set_complete; } if (ice_set_q_coalesce(vsi, ec, q_num)) return -EINVAL; -set_work_lmt: - - if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) - vsi->work_lmt = max(ec->tx_max_coalesced_frames_irq, - ec->rx_max_coalesced_frames_irq); +set_complete: return 0; } @@ -3359,14 +3653,158 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, return __ice_set_coalesce(netdev, ec, q_num); } +#define ICE_I2C_EEPROM_DEV_ADDR 0xA0 +#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2 +#define ICE_MODULE_TYPE_SFP 0x03 +#define ICE_MODULE_TYPE_QSFP_PLUS 0x0D +#define ICE_MODULE_TYPE_QSFP28 0x11 +#define ICE_MODULE_SFF_ADDR_MODE 0x04 +#define ICE_MODULE_SFF_DIAG_CAPAB 0x40 +#define ICE_MODULE_REVISION_ADDR 0x01 +#define ICE_MODULE_SFF_8472_COMP 0x5E +#define ICE_MODULE_SFF_8472_SWAP 0x5C +#define ICE_MODULE_QSFP_MAX_LEN 640 + +/** + * ice_get_module_info - get SFF module type and revision information + * @netdev: network interface device structure + * @modinfo: module EEPROM size and layout information structure + */ +static int +ice_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + u8 sff8472_comp = 0; + u8 sff8472_swap = 0; + u8 sff8636_rev = 0; + u8 value = 0; + + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00, + 0, &value, 1, 0, NULL); + if (status) + return -EIO; + + switch (value) { + case ICE_MODULE_TYPE_SFP: + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_SFF_8472_COMP, 0x00, 0, + &sff8472_comp, 1, 0, NULL); + if (status) + return -EIO; + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_SFF_8472_SWAP, 0x00, 0, + &sff8472_swap, 1, 0, NULL); + if (status) + return -EIO; + + if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else if (sff8472_comp && + (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } + break; + case ICE_MODULE_TYPE_QSFP_PLUS: + case ICE_MODULE_TYPE_QSFP28: + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_REVISION_ADDR, 0x00, 0, + &sff8636_rev, 1, 0, NULL); + if (status) + return -EIO; + /* Check revision compliance */ + if (sff8636_rev > 0x02) { + /* Module is SFF-8636 compliant */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN; + } + break; + default: + netdev_warn(netdev, "SFF Module Type not recognized.\n"); + return -EINVAL; + } + return 0; +} + +/** + * ice_get_module_eeprom - fill buffer with SFF EEPROM contents + * @netdev: network interface device structure + * @ee: EEPROM dump request structure + * @data: buffer to be filled with EEPROM contents + */ +static int +ice_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + u8 addr = ICE_I2C_EEPROM_DEV_ADDR; + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + bool is_sfp = false; + u16 offset = 0; + u8 value = 0; + u8 page = 0; + int i; + + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, + &value, 1, 0, NULL); + if (status) + return -EIO; + + if (!ee || !ee->len || !data) + return -EINVAL; + + if (value == ICE_MODULE_TYPE_SFP) + is_sfp = true; + + for (i = 0; i < ee->len; i++) { + offset = i + ee->offset; + + /* Check if we need to access the other memory page */ + if (is_sfp) { + if (offset >= ETH_MODULE_SFF_8079_LEN) { + offset -= ETH_MODULE_SFF_8079_LEN; + addr = ICE_I2C_EEPROM_DEV_ADDR2; + } + } else { + while (offset >= ETH_MODULE_SFF_8436_LEN) { + /* Compute memory page number and offset. */ + offset -= ETH_MODULE_SFF_8436_LEN / 2; + page++; + } + } + + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp, + &value, 1, 0, NULL); + if (status) + value = 0; + data[i] = value; + } + return 0; +} + static const struct ethtool_ops ice_ethtool_ops = { .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, - .get_drvinfo = ice_get_drvinfo, - .get_regs_len = ice_get_regs_len, - .get_regs = ice_get_regs, - .get_msglevel = ice_get_msglevel, - .set_msglevel = ice_set_msglevel, + .get_drvinfo = ice_get_drvinfo, + .get_regs_len = ice_get_regs_len, + .get_regs = ice_get_regs, + .get_msglevel = ice_get_msglevel, + .set_msglevel = ice_set_msglevel, .self_test = ice_self_test, .get_link = ethtool_op_get_link, .get_eeprom_len = ice_get_eeprom_len, @@ -3380,6 +3818,7 @@ static const struct ethtool_ops ice_ethtool_ops = { .set_priv_flags = ice_set_priv_flags, .get_sset_count = ice_get_sset_count, .get_rxnfc = ice_get_rxnfc, + .set_rxnfc = ice_set_rxnfc, .get_ringparam = ice_get_ringparam, .set_ringparam = ice_set_ringparam, .nway_reset = ice_nway_reset, @@ -3389,14 +3828,46 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_rxfh_indir_size = ice_get_rxfh_indir_size, .get_rxfh = ice_get_rxfh, .set_rxfh = ice_set_rxfh, + .get_channels = ice_get_channels, + .set_channels = ice_set_channels, .get_ts_info = ethtool_op_get_ts_info, - .get_per_queue_coalesce = ice_get_per_q_coalesce, - .set_per_queue_coalesce = ice_set_per_q_coalesce, + .get_per_queue_coalesce = ice_get_per_q_coalesce, + .set_per_queue_coalesce = ice_set_per_q_coalesce, .get_fecparam = ice_get_fecparam, .set_fecparam = ice_set_fecparam, + .get_module_info = ice_get_module_info, + .get_module_eeprom = ice_get_module_eeprom, +}; + +static const struct ethtool_ops ice_ethtool_safe_mode_ops = { + .get_link_ksettings = ice_get_link_ksettings, + .set_link_ksettings = ice_set_link_ksettings, + .get_drvinfo = ice_get_drvinfo, + .get_regs_len = ice_get_regs_len, + .get_regs = ice_get_regs, + .get_msglevel = ice_get_msglevel, + .set_msglevel = ice_set_msglevel, + .get_eeprom_len = ice_get_eeprom_len, + .get_eeprom = ice_get_eeprom, + .get_strings = ice_get_strings, + .get_ethtool_stats = ice_get_ethtool_stats, + .get_sset_count = ice_get_sset_count, + .get_ringparam = ice_get_ringparam, + .set_ringparam = ice_set_ringparam, + .nway_reset = ice_nway_reset, + .get_channels = ice_get_channels, }; /** + * ice_set_ethtool_safe_mode_ops - setup safe mode ethtool ops + * @netdev: network interface device structure + */ +void ice_set_ethtool_safe_mode_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ice_ethtool_safe_mode_ops; +} + +/** * ice_set_ethtool_ops - setup netdev ethtool ops * @netdev: network interface device structure * diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c new file mode 100644 index 000000000000..99208946224c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -0,0 +1,4110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice_common.h" +#include "ice_flex_pipe.h" +#include "ice_flow.h" + +static const u32 ice_sect_lkup[ICE_BLK_COUNT][ICE_SECT_COUNT] = { + /* SWITCH */ + { + ICE_SID_XLT0_SW, + ICE_SID_XLT_KEY_BUILDER_SW, + ICE_SID_XLT1_SW, + ICE_SID_XLT2_SW, + ICE_SID_PROFID_TCAM_SW, + ICE_SID_PROFID_REDIR_SW, + ICE_SID_FLD_VEC_SW, + ICE_SID_CDID_KEY_BUILDER_SW, + ICE_SID_CDID_REDIR_SW + }, + + /* ACL */ + { + ICE_SID_XLT0_ACL, + ICE_SID_XLT_KEY_BUILDER_ACL, + ICE_SID_XLT1_ACL, + ICE_SID_XLT2_ACL, + ICE_SID_PROFID_TCAM_ACL, + ICE_SID_PROFID_REDIR_ACL, + ICE_SID_FLD_VEC_ACL, + ICE_SID_CDID_KEY_BUILDER_ACL, + ICE_SID_CDID_REDIR_ACL + }, + + /* FD */ + { + ICE_SID_XLT0_FD, + ICE_SID_XLT_KEY_BUILDER_FD, + ICE_SID_XLT1_FD, + ICE_SID_XLT2_FD, + ICE_SID_PROFID_TCAM_FD, + ICE_SID_PROFID_REDIR_FD, + ICE_SID_FLD_VEC_FD, + ICE_SID_CDID_KEY_BUILDER_FD, + ICE_SID_CDID_REDIR_FD + }, + + /* RSS */ + { + ICE_SID_XLT0_RSS, + ICE_SID_XLT_KEY_BUILDER_RSS, + ICE_SID_XLT1_RSS, + ICE_SID_XLT2_RSS, + ICE_SID_PROFID_TCAM_RSS, + ICE_SID_PROFID_REDIR_RSS, + ICE_SID_FLD_VEC_RSS, + ICE_SID_CDID_KEY_BUILDER_RSS, + ICE_SID_CDID_REDIR_RSS + }, + + /* PE */ + { + ICE_SID_XLT0_PE, + ICE_SID_XLT_KEY_BUILDER_PE, + ICE_SID_XLT1_PE, + ICE_SID_XLT2_PE, + ICE_SID_PROFID_TCAM_PE, + ICE_SID_PROFID_REDIR_PE, + ICE_SID_FLD_VEC_PE, + ICE_SID_CDID_KEY_BUILDER_PE, + ICE_SID_CDID_REDIR_PE + } +}; + +/** + * ice_sect_id - returns section ID + * @blk: block type + * @sect: section type + * + * This helper function returns the proper section ID given a block type and a + * section type. + */ +static u32 ice_sect_id(enum ice_block blk, enum ice_sect sect) +{ + return ice_sect_lkup[blk][sect]; +} + +/** + * ice_pkg_val_buf + * @buf: pointer to the ice buffer + * + * This helper function validates a buffer's header. + */ +static struct ice_buf_hdr *ice_pkg_val_buf(struct ice_buf *buf) +{ + struct ice_buf_hdr *hdr; + u16 section_count; + u16 data_end; + + hdr = (struct ice_buf_hdr *)buf->buf; + /* verify data */ + section_count = le16_to_cpu(hdr->section_count); + if (section_count < ICE_MIN_S_COUNT || section_count > ICE_MAX_S_COUNT) + return NULL; + + data_end = le16_to_cpu(hdr->data_end); + if (data_end < ICE_MIN_S_DATA_END || data_end > ICE_MAX_S_DATA_END) + return NULL; + + return hdr; +} + +/** + * ice_find_buf_table + * @ice_seg: pointer to the ice segment + * + * Returns the address of the buffer table within the ice segment. + */ +static struct ice_buf_table *ice_find_buf_table(struct ice_seg *ice_seg) +{ + struct ice_nvm_table *nvms; + + nvms = (struct ice_nvm_table *) + (ice_seg->device_table + + le32_to_cpu(ice_seg->device_table_count)); + + return (__force struct ice_buf_table *) + (nvms->vers + le32_to_cpu(nvms->table_count)); +} + +/** + * ice_pkg_enum_buf + * @ice_seg: pointer to the ice segment (or NULL on subsequent calls) + * @state: pointer to the enum state + * + * This function will enumerate all the buffers in the ice segment. The first + * call is made with the ice_seg parameter non-NULL; on subsequent calls, + * ice_seg is set to NULL which continues the enumeration. When the function + * returns a NULL pointer, then the end of the buffers has been reached, or an + * unexpected value has been detected (for example an invalid section count or + * an invalid buffer end value). + */ +static struct ice_buf_hdr * +ice_pkg_enum_buf(struct ice_seg *ice_seg, struct ice_pkg_enum *state) +{ + if (ice_seg) { + state->buf_table = ice_find_buf_table(ice_seg); + if (!state->buf_table) + return NULL; + + state->buf_idx = 0; + return ice_pkg_val_buf(state->buf_table->buf_array); + } + + if (++state->buf_idx < le32_to_cpu(state->buf_table->buf_count)) + return ice_pkg_val_buf(state->buf_table->buf_array + + state->buf_idx); + else + return NULL; +} + +/** + * ice_pkg_advance_sect + * @ice_seg: pointer to the ice segment (or NULL on subsequent calls) + * @state: pointer to the enum state + * + * This helper function will advance the section within the ice segment, + * also advancing the buffer if needed. + */ +static bool +ice_pkg_advance_sect(struct ice_seg *ice_seg, struct ice_pkg_enum *state) +{ + if (!ice_seg && !state->buf) + return false; + + if (!ice_seg && state->buf) + if (++state->sect_idx < le16_to_cpu(state->buf->section_count)) + return true; + + state->buf = ice_pkg_enum_buf(ice_seg, state); + if (!state->buf) + return false; + + /* start of new buffer, reset section index */ + state->sect_idx = 0; + return true; +} + +/** + * ice_pkg_enum_section + * @ice_seg: pointer to the ice segment (or NULL on subsequent calls) + * @state: pointer to the enum state + * @sect_type: section type to enumerate + * + * This function will enumerate all the sections of a particular type in the + * ice segment. The first call is made with the ice_seg parameter non-NULL; + * on subsequent calls, ice_seg is set to NULL which continues the enumeration. + * When the function returns a NULL pointer, then the end of the matching + * sections has been reached. + */ +static void * +ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state, + u32 sect_type) +{ + u16 offset, size; + + if (ice_seg) + state->type = sect_type; + + if (!ice_pkg_advance_sect(ice_seg, state)) + return NULL; + + /* scan for next matching section */ + while (state->buf->section_entry[state->sect_idx].type != + cpu_to_le32(state->type)) + if (!ice_pkg_advance_sect(NULL, state)) + return NULL; + + /* validate section */ + offset = le16_to_cpu(state->buf->section_entry[state->sect_idx].offset); + if (offset < ICE_MIN_S_OFF || offset > ICE_MAX_S_OFF) + return NULL; + + size = le16_to_cpu(state->buf->section_entry[state->sect_idx].size); + if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ) + return NULL; + + /* make sure the section fits in the buffer */ + if (offset + size > ICE_PKG_BUF_SIZE) + return NULL; + + state->sect_type = + le32_to_cpu(state->buf->section_entry[state->sect_idx].type); + + /* calc pointer to this section */ + state->sect = ((u8 *)state->buf) + + le16_to_cpu(state->buf->section_entry[state->sect_idx].offset); + + return state->sect; +} + +/* Key creation */ + +#define ICE_DC_KEY 0x1 /* don't care */ +#define ICE_DC_KEYINV 0x1 +#define ICE_NM_KEY 0x0 /* never match */ +#define ICE_NM_KEYINV 0x0 +#define ICE_0_KEY 0x1 /* match 0 */ +#define ICE_0_KEYINV 0x0 +#define ICE_1_KEY 0x0 /* match 1 */ +#define ICE_1_KEYINV 0x1 + +/** + * ice_gen_key_word - generate 16-bits of a key/mask word + * @val: the value + * @valid: valid bits mask (change only the valid bits) + * @dont_care: don't care mask + * @nvr_mtch: never match mask + * @key: pointer to an array of where the resulting key portion + * @key_inv: pointer to an array of where the resulting key invert portion + * + * This function generates 16-bits from a 8-bit value, an 8-bit don't care mask + * and an 8-bit never match mask. The 16-bits of output are divided into 8 bits + * of key and 8 bits of key invert. + * + * '0' = b01, always match a 0 bit + * '1' = b10, always match a 1 bit + * '?' = b11, don't care bit (always matches) + * '~' = b00, never match bit + * + * Input: + * val: b0 1 0 1 0 1 + * dont_care: b0 0 1 1 0 0 + * never_mtch: b0 0 0 0 1 1 + * ------------------------------ + * Result: key: b01 10 11 11 00 00 + */ +static enum ice_status +ice_gen_key_word(u8 val, u8 valid, u8 dont_care, u8 nvr_mtch, u8 *key, + u8 *key_inv) +{ + u8 in_key = *key, in_key_inv = *key_inv; + u8 i; + + /* 'dont_care' and 'nvr_mtch' masks cannot overlap */ + if ((dont_care ^ nvr_mtch) != (dont_care | nvr_mtch)) + return ICE_ERR_CFG; + + *key = 0; + *key_inv = 0; + + /* encode the 8 bits into 8-bit key and 8-bit key invert */ + for (i = 0; i < 8; i++) { + *key >>= 1; + *key_inv >>= 1; + + if (!(valid & 0x1)) { /* change only valid bits */ + *key |= (in_key & 0x1) << 7; + *key_inv |= (in_key_inv & 0x1) << 7; + } else if (dont_care & 0x1) { /* don't care bit */ + *key |= ICE_DC_KEY << 7; + *key_inv |= ICE_DC_KEYINV << 7; + } else if (nvr_mtch & 0x1) { /* never match bit */ + *key |= ICE_NM_KEY << 7; + *key_inv |= ICE_NM_KEYINV << 7; + } else if (val & 0x01) { /* exact 1 match */ + *key |= ICE_1_KEY << 7; + *key_inv |= ICE_1_KEYINV << 7; + } else { /* exact 0 match */ + *key |= ICE_0_KEY << 7; + *key_inv |= ICE_0_KEYINV << 7; + } + + dont_care >>= 1; + nvr_mtch >>= 1; + valid >>= 1; + val >>= 1; + in_key >>= 1; + in_key_inv >>= 1; + } + + return 0; +} + +/** + * ice_bits_max_set - determine if the number of bits set is within a maximum + * @mask: pointer to the byte array which is the mask + * @size: the number of bytes in the mask + * @max: the max number of set bits + * + * This function determines if there are at most 'max' number of bits set in an + * array. Returns true if the number for bits set is <= max or will return false + * otherwise. + */ +static bool ice_bits_max_set(const u8 *mask, u16 size, u16 max) +{ + u16 count = 0; + u16 i; + + /* check each byte */ + for (i = 0; i < size; i++) { + /* if 0, go to next byte */ + if (!mask[i]) + continue; + + /* We know there is at least one set bit in this byte because of + * the above check; if we already have found 'max' number of + * bits set, then we can return failure now. + */ + if (count == max) + return false; + + /* count the bits in this byte, checking threshold */ + count += hweight8(mask[i]); + if (count > max) + return false; + } + + return true; +} + +/** + * ice_set_key - generate a variable sized key with multiples of 16-bits + * @key: pointer to where the key will be stored + * @size: the size of the complete key in bytes (must be even) + * @val: array of 8-bit values that makes up the value portion of the key + * @upd: array of 8-bit masks that determine what key portion to update + * @dc: array of 8-bit masks that make up the don't care mask + * @nm: array of 8-bit masks that make up the never match mask + * @off: the offset of the first byte in the key to update + * @len: the number of bytes in the key update + * + * This function generates a key from a value, a don't care mask and a never + * match mask. + * upd, dc, and nm are optional parameters, and can be NULL: + * upd == NULL --> udp mask is all 1's (update all bits) + * dc == NULL --> dc mask is all 0's (no don't care bits) + * nm == NULL --> nm mask is all 0's (no never match bits) + */ +static enum ice_status +ice_set_key(u8 *key, u16 size, u8 *val, u8 *upd, u8 *dc, u8 *nm, u16 off, + u16 len) +{ + u16 half_size; + u16 i; + + /* size must be a multiple of 2 bytes. */ + if (size % 2) + return ICE_ERR_CFG; + + half_size = size / 2; + if (off + len > half_size) + return ICE_ERR_CFG; + + /* Make sure at most one bit is set in the never match mask. Having more + * than one never match mask bit set will cause HW to consume excessive + * power otherwise; this is a power management efficiency check. + */ +#define ICE_NVR_MTCH_BITS_MAX 1 + if (nm && !ice_bits_max_set(nm, len, ICE_NVR_MTCH_BITS_MAX)) + return ICE_ERR_CFG; + + for (i = 0; i < len; i++) + if (ice_gen_key_word(val[i], upd ? upd[i] : 0xff, + dc ? dc[i] : 0, nm ? nm[i] : 0, + key + off + i, key + half_size + off + i)) + return ICE_ERR_CFG; + + return 0; +} + +/** + * ice_acquire_global_cfg_lock + * @hw: pointer to the HW structure + * @access: access type (read or write) + * + * This function will request ownership of the global config lock for reading + * or writing of the package. When attempting to obtain write access, the + * caller must check for the following two return values: + * + * ICE_SUCCESS - Means the caller has acquired the global config lock + * and can perform writing of the package. + * ICE_ERR_AQ_NO_WORK - Indicates another driver has already written the + * package or has found that no update was necessary; in + * this case, the caller can just skip performing any + * update of the package. + */ +static enum ice_status +ice_acquire_global_cfg_lock(struct ice_hw *hw, + enum ice_aq_res_access_type access) +{ + enum ice_status status; + + status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, access, + ICE_GLOBAL_CFG_LOCK_TIMEOUT); + + if (!status) + mutex_lock(&ice_global_cfg_lock_sw); + else if (status == ICE_ERR_AQ_NO_WORK) + ice_debug(hw, ICE_DBG_PKG, + "Global config lock: No work to do\n"); + + return status; +} + +/** + * ice_release_global_cfg_lock + * @hw: pointer to the HW structure + * + * This function will release the global config lock. + */ +static void ice_release_global_cfg_lock(struct ice_hw *hw) +{ + mutex_unlock(&ice_global_cfg_lock_sw); + ice_release_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID); +} + +/** + * ice_acquire_change_lock + * @hw: pointer to the HW structure + * @access: access type (read or write) + * + * This function will request ownership of the change lock. + */ +static enum ice_status +ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access) +{ + return ice_acquire_res(hw, ICE_CHANGE_LOCK_RES_ID, access, + ICE_CHANGE_LOCK_TIMEOUT); +} + +/** + * ice_release_change_lock + * @hw: pointer to the HW structure + * + * This function will release the change lock using the proper Admin Command. + */ +static void ice_release_change_lock(struct ice_hw *hw) +{ + ice_release_res(hw, ICE_CHANGE_LOCK_RES_ID); +} + +/** + * ice_aq_download_pkg + * @hw: pointer to the hardware structure + * @pkg_buf: the package buffer to transfer + * @buf_size: the size of the package buffer + * @last_buf: last buffer indicator + * @error_offset: returns error offset + * @error_info: returns error information + * @cd: pointer to command details structure or NULL + * + * Download Package (0x0C40) + */ +static enum ice_status +ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, + u16 buf_size, bool last_buf, u32 *error_offset, + u32 *error_info, struct ice_sq_cd *cd) +{ + struct ice_aqc_download_pkg *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + if (error_offset) + *error_offset = 0; + if (error_info) + *error_info = 0; + + cmd = &desc.params.download_pkg; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_download_pkg); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + if (last_buf) + cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF; + + status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd); + if (status == ICE_ERR_AQ_ERROR) { + /* Read error from buffer only when the FW returned an error */ + struct ice_aqc_download_pkg_resp *resp; + + resp = (struct ice_aqc_download_pkg_resp *)pkg_buf; + if (error_offset) + *error_offset = le32_to_cpu(resp->error_offset); + if (error_info) + *error_info = le32_to_cpu(resp->error_info); + } + + return status; +} + +/** + * ice_aq_update_pkg + * @hw: pointer to the hardware structure + * @pkg_buf: the package cmd buffer + * @buf_size: the size of the package cmd buffer + * @last_buf: last buffer indicator + * @error_offset: returns error offset + * @error_info: returns error information + * @cd: pointer to command details structure or NULL + * + * Update Package (0x0C42) + */ +static enum ice_status +ice_aq_update_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, u16 buf_size, + bool last_buf, u32 *error_offset, u32 *error_info, + struct ice_sq_cd *cd) +{ + struct ice_aqc_download_pkg *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + if (error_offset) + *error_offset = 0; + if (error_info) + *error_info = 0; + + cmd = &desc.params.download_pkg; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_pkg); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + if (last_buf) + cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF; + + status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd); + if (status == ICE_ERR_AQ_ERROR) { + /* Read error from buffer only when the FW returned an error */ + struct ice_aqc_download_pkg_resp *resp; + + resp = (struct ice_aqc_download_pkg_resp *)pkg_buf; + if (error_offset) + *error_offset = le32_to_cpu(resp->error_offset); + if (error_info) + *error_info = le32_to_cpu(resp->error_info); + } + + return status; +} + +/** + * ice_find_seg_in_pkg + * @hw: pointer to the hardware structure + * @seg_type: the segment type to search for (i.e., SEGMENT_TYPE_CPK) + * @pkg_hdr: pointer to the package header to be searched + * + * This function searches a package file for a particular segment type. On + * success it returns a pointer to the segment header, otherwise it will + * return NULL. + */ +static struct ice_generic_seg_hdr * +ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type, + struct ice_pkg_hdr *pkg_hdr) +{ + u32 i; + + ice_debug(hw, ICE_DBG_PKG, "Package format version: %d.%d.%d.%d\n", + pkg_hdr->format_ver.major, pkg_hdr->format_ver.minor, + pkg_hdr->format_ver.update, pkg_hdr->format_ver.draft); + + /* Search all package segments for the requested segment type */ + for (i = 0; i < le32_to_cpu(pkg_hdr->seg_count); i++) { + struct ice_generic_seg_hdr *seg; + + seg = (struct ice_generic_seg_hdr *) + ((u8 *)pkg_hdr + le32_to_cpu(pkg_hdr->seg_offset[i])); + + if (le32_to_cpu(seg->seg_type) == seg_type) + return seg; + } + + return NULL; +} + +/** + * ice_update_pkg + * @hw: pointer to the hardware structure + * @bufs: pointer to an array of buffers + * @count: the number of buffers in the array + * + * Obtains change lock and updates package. + */ +static enum ice_status +ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count) +{ + enum ice_status status; + u32 offset, info, i; + + status = ice_acquire_change_lock(hw, ICE_RES_WRITE); + if (status) + return status; + + for (i = 0; i < count; i++) { + struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i); + bool last = ((i + 1) == count); + + status = ice_aq_update_pkg(hw, bh, le16_to_cpu(bh->data_end), + last, &offset, &info, NULL); + + if (status) { + ice_debug(hw, ICE_DBG_PKG, + "Update pkg failed: err %d off %d inf %d\n", + status, offset, info); + break; + } + } + + ice_release_change_lock(hw); + + return status; +} + +/** + * ice_dwnld_cfg_bufs + * @hw: pointer to the hardware structure + * @bufs: pointer to an array of buffers + * @count: the number of buffers in the array + * + * Obtains global config lock and downloads the package configuration buffers + * to the firmware. Metadata buffers are skipped, and the first metadata buffer + * found indicates that the rest of the buffers are all metadata buffers. + */ +static enum ice_status +ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count) +{ + enum ice_status status; + struct ice_buf_hdr *bh; + u32 offset, info, i; + + if (!bufs || !count) + return ICE_ERR_PARAM; + + /* If the first buffer's first section has its metadata bit set + * then there are no buffers to be downloaded, and the operation is + * considered a success. + */ + bh = (struct ice_buf_hdr *)bufs; + if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF) + return 0; + + /* reset pkg_dwnld_status in case this function is called in the + * reset/rebuild flow + */ + hw->pkg_dwnld_status = ICE_AQ_RC_OK; + + status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE); + if (status) { + if (status == ICE_ERR_AQ_NO_WORK) + hw->pkg_dwnld_status = ICE_AQ_RC_EEXIST; + else + hw->pkg_dwnld_status = hw->adminq.sq_last_status; + return status; + } + + for (i = 0; i < count; i++) { + bool last = ((i + 1) == count); + + if (!last) { + /* check next buffer for metadata flag */ + bh = (struct ice_buf_hdr *)(bufs + i + 1); + + /* A set metadata flag in the next buffer will signal + * that the current buffer will be the last buffer + * downloaded + */ + if (le16_to_cpu(bh->section_count)) + if (le32_to_cpu(bh->section_entry[0].type) & + ICE_METADATA_BUF) + last = true; + } + + bh = (struct ice_buf_hdr *)(bufs + i); + + status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last, + &offset, &info, NULL); + + /* Save AQ status from download package */ + hw->pkg_dwnld_status = hw->adminq.sq_last_status; + if (status) { + ice_debug(hw, ICE_DBG_PKG, + "Pkg download failed: err %d off %d inf %d\n", + status, offset, info); + + break; + } + + if (last) + break; + } + + ice_release_global_cfg_lock(hw); + + return status; +} + +/** + * ice_aq_get_pkg_info_list + * @hw: pointer to the hardware structure + * @pkg_info: the buffer which will receive the information list + * @buf_size: the size of the pkg_info information buffer + * @cd: pointer to command details structure or NULL + * + * Get Package Info List (0x0C43) + */ +static enum ice_status +ice_aq_get_pkg_info_list(struct ice_hw *hw, + struct ice_aqc_get_pkg_info_resp *pkg_info, + u16 buf_size, struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_pkg_info_list); + + return ice_aq_send_cmd(hw, &desc, pkg_info, buf_size, cd); +} + +/** + * ice_download_pkg + * @hw: pointer to the hardware structure + * @ice_seg: pointer to the segment of the package to be downloaded + * + * Handles the download of a complete package. + */ +static enum ice_status +ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg) +{ + struct ice_buf_table *ice_buf_tbl; + + ice_debug(hw, ICE_DBG_PKG, "Segment version: %d.%d.%d.%d\n", + ice_seg->hdr.seg_ver.major, ice_seg->hdr.seg_ver.minor, + ice_seg->hdr.seg_ver.update, ice_seg->hdr.seg_ver.draft); + + ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n", + le32_to_cpu(ice_seg->hdr.seg_type), + le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_name); + + ice_buf_tbl = ice_find_buf_table(ice_seg); + + ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n", + le32_to_cpu(ice_buf_tbl->buf_count)); + + return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array, + le32_to_cpu(ice_buf_tbl->buf_count)); +} + +/** + * ice_init_pkg_info + * @hw: pointer to the hardware structure + * @pkg_hdr: pointer to the driver's package hdr + * + * Saves off the package details into the HW structure. + */ +static enum ice_status +ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) +{ + struct ice_global_metadata_seg *meta_seg; + struct ice_generic_seg_hdr *seg_hdr; + + if (!pkg_hdr) + return ICE_ERR_PARAM; + + meta_seg = (struct ice_global_metadata_seg *) + ice_find_seg_in_pkg(hw, SEGMENT_TYPE_METADATA, pkg_hdr); + if (meta_seg) { + hw->pkg_ver = meta_seg->pkg_ver; + memcpy(hw->pkg_name, meta_seg->pkg_name, sizeof(hw->pkg_name)); + + ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n", + meta_seg->pkg_ver.major, meta_seg->pkg_ver.minor, + meta_seg->pkg_ver.update, meta_seg->pkg_ver.draft, + meta_seg->pkg_name); + } else { + ice_debug(hw, ICE_DBG_INIT, + "Did not find metadata segment in driver package\n"); + return ICE_ERR_CFG; + } + + seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr); + if (seg_hdr) { + hw->ice_pkg_ver = seg_hdr->seg_ver; + memcpy(hw->ice_pkg_name, seg_hdr->seg_name, + sizeof(hw->ice_pkg_name)); + + ice_debug(hw, ICE_DBG_PKG, "Ice Pkg: %d.%d.%d.%d, %s\n", + seg_hdr->seg_ver.major, seg_hdr->seg_ver.minor, + seg_hdr->seg_ver.update, seg_hdr->seg_ver.draft, + seg_hdr->seg_name); + } else { + ice_debug(hw, ICE_DBG_INIT, + "Did not find ice segment in driver package\n"); + return ICE_ERR_CFG; + } + + return 0; +} + +/** + * ice_get_pkg_info + * @hw: pointer to the hardware structure + * + * Store details of the package currently loaded in HW into the HW structure. + */ +static enum ice_status ice_get_pkg_info(struct ice_hw *hw) +{ + struct ice_aqc_get_pkg_info_resp *pkg_info; + enum ice_status status; + u16 size; + u32 i; + + size = sizeof(*pkg_info) + (sizeof(pkg_info->pkg_info[0]) * + (ICE_PKG_CNT - 1)); + pkg_info = kzalloc(size, GFP_KERNEL); + if (!pkg_info) + return ICE_ERR_NO_MEMORY; + + status = ice_aq_get_pkg_info_list(hw, pkg_info, size, NULL); + if (status) + goto init_pkg_free_alloc; + + for (i = 0; i < le32_to_cpu(pkg_info->count); i++) { +#define ICE_PKG_FLAG_COUNT 4 + char flags[ICE_PKG_FLAG_COUNT + 1] = { 0 }; + u8 place = 0; + + if (pkg_info->pkg_info[i].is_active) { + flags[place++] = 'A'; + hw->active_pkg_ver = pkg_info->pkg_info[i].ver; + memcpy(hw->active_pkg_name, + pkg_info->pkg_info[i].name, + sizeof(hw->active_pkg_name)); + hw->active_pkg_in_nvm = pkg_info->pkg_info[i].is_in_nvm; + } + if (pkg_info->pkg_info[i].is_active_at_boot) + flags[place++] = 'B'; + if (pkg_info->pkg_info[i].is_modified) + flags[place++] = 'M'; + if (pkg_info->pkg_info[i].is_in_nvm) + flags[place++] = 'N'; + + ice_debug(hw, ICE_DBG_PKG, "Pkg[%d]: %d.%d.%d.%d,%s,%s\n", + i, pkg_info->pkg_info[i].ver.major, + pkg_info->pkg_info[i].ver.minor, + pkg_info->pkg_info[i].ver.update, + pkg_info->pkg_info[i].ver.draft, + pkg_info->pkg_info[i].name, flags); + } + +init_pkg_free_alloc: + kfree(pkg_info); + + return status; +} + +/** + * ice_verify_pkg - verify package + * @pkg: pointer to the package buffer + * @len: size of the package buffer + * + * Verifies various attributes of the package file, including length, format + * version, and the requirement of at least one segment. + */ +static enum ice_status ice_verify_pkg(struct ice_pkg_hdr *pkg, u32 len) +{ + u32 seg_count; + u32 i; + + if (len < sizeof(*pkg)) + return ICE_ERR_BUF_TOO_SHORT; + + if (pkg->format_ver.major != ICE_PKG_FMT_VER_MAJ || + pkg->format_ver.minor != ICE_PKG_FMT_VER_MNR || + pkg->format_ver.update != ICE_PKG_FMT_VER_UPD || + pkg->format_ver.draft != ICE_PKG_FMT_VER_DFT) + return ICE_ERR_CFG; + + /* pkg must have at least one segment */ + seg_count = le32_to_cpu(pkg->seg_count); + if (seg_count < 1) + return ICE_ERR_CFG; + + /* make sure segment array fits in package length */ + if (len < sizeof(*pkg) + ((seg_count - 1) * sizeof(pkg->seg_offset))) + return ICE_ERR_BUF_TOO_SHORT; + + /* all segments must fit within length */ + for (i = 0; i < seg_count; i++) { + u32 off = le32_to_cpu(pkg->seg_offset[i]); + struct ice_generic_seg_hdr *seg; + + /* segment header must fit */ + if (len < off + sizeof(*seg)) + return ICE_ERR_BUF_TOO_SHORT; + + seg = (struct ice_generic_seg_hdr *)((u8 *)pkg + off); + + /* segment body must fit */ + if (len < off + le32_to_cpu(seg->seg_size)) + return ICE_ERR_BUF_TOO_SHORT; + } + + return 0; +} + +/** + * ice_free_seg - free package segment pointer + * @hw: pointer to the hardware structure + * + * Frees the package segment pointer in the proper manner, depending on if the + * segment was allocated or just the passed in pointer was stored. + */ +void ice_free_seg(struct ice_hw *hw) +{ + if (hw->pkg_copy) { + devm_kfree(ice_hw_to_dev(hw), hw->pkg_copy); + hw->pkg_copy = NULL; + hw->pkg_size = 0; + } + hw->seg = NULL; +} + +/** + * ice_init_pkg_regs - initialize additional package registers + * @hw: pointer to the hardware structure + */ +static void ice_init_pkg_regs(struct ice_hw *hw) +{ +#define ICE_SW_BLK_INP_MASK_L 0xFFFFFFFF +#define ICE_SW_BLK_INP_MASK_H 0x0000FFFF +#define ICE_SW_BLK_IDX 0 + + /* setup Switch block input mask, which is 48-bits in two parts */ + wr32(hw, GL_PREEXT_L2_PMASK0(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_L); + wr32(hw, GL_PREEXT_L2_PMASK1(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_H); +} + +/** + * ice_chk_pkg_version - check package version for compatibility with driver + * @pkg_ver: pointer to a version structure to check + * + * Check to make sure that the package about to be downloaded is compatible with + * the driver. To be compatible, the major and minor components of the package + * version must match our ICE_PKG_SUPP_VER_MAJ and ICE_PKG_SUPP_VER_MNR + * definitions. + */ +static enum ice_status ice_chk_pkg_version(struct ice_pkg_ver *pkg_ver) +{ + if (pkg_ver->major != ICE_PKG_SUPP_VER_MAJ || + pkg_ver->minor != ICE_PKG_SUPP_VER_MNR) + return ICE_ERR_NOT_SUPPORTED; + + return 0; +} + +/** + * ice_init_pkg - initialize/download package + * @hw: pointer to the hardware structure + * @buf: pointer to the package buffer + * @len: size of the package buffer + * + * This function initializes a package. The package contains HW tables + * required to do packet processing. First, the function extracts package + * information such as version. Then it finds the ice configuration segment + * within the package; this function then saves a copy of the segment pointer + * within the supplied package buffer. Next, the function will cache any hints + * from the package, followed by downloading the package itself. Note, that if + * a previous PF driver has already downloaded the package successfully, then + * the current driver will not have to download the package again. + * + * The local package contents will be used to query default behavior and to + * update specific sections of the HW's version of the package (e.g. to update + * the parse graph to understand new protocols). + * + * This function stores a pointer to the package buffer memory, and it is + * expected that the supplied buffer will not be freed immediately. If the + * package buffer needs to be freed, such as when read from a file, use + * ice_copy_and_init_pkg() instead of directly calling ice_init_pkg() in this + * case. + */ +enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len) +{ + struct ice_pkg_hdr *pkg; + enum ice_status status; + struct ice_seg *seg; + + if (!buf || !len) + return ICE_ERR_PARAM; + + pkg = (struct ice_pkg_hdr *)buf; + status = ice_verify_pkg(pkg, len); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "failed to verify pkg (err: %d)\n", + status); + return status; + } + + /* initialize package info */ + status = ice_init_pkg_info(hw, pkg); + if (status) + return status; + + /* before downloading the package, check package version for + * compatibility with driver + */ + status = ice_chk_pkg_version(&hw->pkg_ver); + if (status) + return status; + + /* find segment in given package */ + seg = (struct ice_seg *)ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg); + if (!seg) { + ice_debug(hw, ICE_DBG_INIT, "no ice segment in package.\n"); + return ICE_ERR_CFG; + } + + /* download package */ + status = ice_download_pkg(hw, seg); + if (status == ICE_ERR_AQ_NO_WORK) { + ice_debug(hw, ICE_DBG_INIT, + "package previously loaded - no work.\n"); + status = 0; + } + + /* Get information on the package currently loaded in HW, then make sure + * the driver is compatible with this version. + */ + if (!status) { + status = ice_get_pkg_info(hw); + if (!status) + status = ice_chk_pkg_version(&hw->active_pkg_ver); + } + + if (!status) { + hw->seg = seg; + /* on successful package download update other required + * registers to support the package and fill HW tables + * with package content. + */ + ice_init_pkg_regs(hw); + ice_fill_blk_tbls(hw); + } else { + ice_debug(hw, ICE_DBG_INIT, "package load failed, %d\n", + status); + } + + return status; +} + +/** + * ice_copy_and_init_pkg - initialize/download a copy of the package + * @hw: pointer to the hardware structure + * @buf: pointer to the package buffer + * @len: size of the package buffer + * + * This function copies the package buffer, and then calls ice_init_pkg() to + * initialize the copied package contents. + * + * The copying is necessary if the package buffer supplied is constant, or if + * the memory may disappear shortly after calling this function. + * + * If the package buffer resides in the data segment and can be modified, the + * caller is free to use ice_init_pkg() instead of ice_copy_and_init_pkg(). + * + * However, if the package buffer needs to be copied first, such as when being + * read from a file, the caller should use ice_copy_and_init_pkg(). + * + * This function will first copy the package buffer, before calling + * ice_init_pkg(). The caller is free to immediately destroy the original + * package buffer, as the new copy will be managed by this function and + * related routines. + */ +enum ice_status ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len) +{ + enum ice_status status; + u8 *buf_copy; + + if (!buf || !len) + return ICE_ERR_PARAM; + + buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL); + + status = ice_init_pkg(hw, buf_copy, len); + if (status) { + /* Free the copy, since we failed to initialize the package */ + devm_kfree(ice_hw_to_dev(hw), buf_copy); + } else { + /* Track the copied pkg so we can free it later */ + hw->pkg_copy = buf_copy; + hw->pkg_size = len; + } + + return status; +} + +/** + * ice_pkg_buf_alloc + * @hw: pointer to the HW structure + * + * Allocates a package buffer and returns a pointer to the buffer header. + * Note: all package contents must be in Little Endian form. + */ +static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw) +{ + struct ice_buf_build *bld; + struct ice_buf_hdr *buf; + + bld = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*bld), GFP_KERNEL); + if (!bld) + return NULL; + + buf = (struct ice_buf_hdr *)bld; + buf->data_end = cpu_to_le16(offsetof(struct ice_buf_hdr, + section_entry)); + return bld; +} + +/** + * ice_pkg_buf_free + * @hw: pointer to the HW structure + * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) + * + * Frees a package buffer + */ +static void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld) +{ + devm_kfree(ice_hw_to_dev(hw), bld); +} + +/** + * ice_pkg_buf_reserve_section + * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) + * @count: the number of sections to reserve + * + * Reserves one or more section table entries in a package buffer. This routine + * can be called multiple times as long as they are made before calling + * ice_pkg_buf_alloc_section(). Once ice_pkg_buf_alloc_section() + * is called once, the number of sections that can be allocated will not be able + * to be increased; not using all reserved sections is fine, but this will + * result in some wasted space in the buffer. + * Note: all package contents must be in Little Endian form. + */ +static enum ice_status +ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count) +{ + struct ice_buf_hdr *buf; + u16 section_count; + u16 data_end; + + if (!bld) + return ICE_ERR_PARAM; + + buf = (struct ice_buf_hdr *)&bld->buf; + + /* already an active section, can't increase table size */ + section_count = le16_to_cpu(buf->section_count); + if (section_count > 0) + return ICE_ERR_CFG; + + if (bld->reserved_section_table_entries + count > ICE_MAX_S_COUNT) + return ICE_ERR_CFG; + bld->reserved_section_table_entries += count; + + data_end = le16_to_cpu(buf->data_end) + + (count * sizeof(buf->section_entry[0])); + buf->data_end = cpu_to_le16(data_end); + + return 0; +} + +/** + * ice_pkg_buf_alloc_section + * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) + * @type: the section type value + * @size: the size of the section to reserve (in bytes) + * + * Reserves memory in the buffer for a section's content and updates the + * buffers' status accordingly. This routine returns a pointer to the first + * byte of the section start within the buffer, which is used to fill in the + * section contents. + * Note: all package contents must be in Little Endian form. + */ +static void * +ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size) +{ + struct ice_buf_hdr *buf; + u16 sect_count; + u16 data_end; + + if (!bld || !type || !size) + return NULL; + + buf = (struct ice_buf_hdr *)&bld->buf; + + /* check for enough space left in buffer */ + data_end = le16_to_cpu(buf->data_end); + + /* section start must align on 4 byte boundary */ + data_end = ALIGN(data_end, 4); + + if ((data_end + size) > ICE_MAX_S_DATA_END) + return NULL; + + /* check for more available section table entries */ + sect_count = le16_to_cpu(buf->section_count); + if (sect_count < bld->reserved_section_table_entries) { + void *section_ptr = ((u8 *)buf) + data_end; + + buf->section_entry[sect_count].offset = cpu_to_le16(data_end); + buf->section_entry[sect_count].size = cpu_to_le16(size); + buf->section_entry[sect_count].type = cpu_to_le32(type); + + data_end += size; + buf->data_end = cpu_to_le16(data_end); + + buf->section_count = cpu_to_le16(sect_count + 1); + return section_ptr; + } + + /* no free section table entries */ + return NULL; +} + +/** + * ice_pkg_buf_get_active_sections + * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) + * + * Returns the number of active sections. Before using the package buffer + * in an update package command, the caller should make sure that there is at + * least one active section - otherwise, the buffer is not legal and should + * not be used. + * Note: all package contents must be in Little Endian form. + */ +static u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld) +{ + struct ice_buf_hdr *buf; + + if (!bld) + return 0; + + buf = (struct ice_buf_hdr *)&bld->buf; + return le16_to_cpu(buf->section_count); +} + +/** + * ice_pkg_buf + * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) + * + * Return a pointer to the buffer's header + */ +static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld) +{ + if (!bld) + return NULL; + + return &bld->buf; +} + +/* PTG Management */ + +/** + * ice_ptg_find_ptype - Search for packet type group using packet type (ptype) + * @hw: pointer to the hardware structure + * @blk: HW block + * @ptype: the ptype to search for + * @ptg: pointer to variable that receives the PTG + * + * This function will search the PTGs for a particular ptype, returning the + * PTG ID that contains it through the PTG parameter, with the value of + * ICE_DEFAULT_PTG (0) meaning it is part the default PTG. + */ +static enum ice_status +ice_ptg_find_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 *ptg) +{ + if (ptype >= ICE_XLT1_CNT || !ptg) + return ICE_ERR_PARAM; + + *ptg = hw->blk[blk].xlt1.ptypes[ptype].ptg; + return 0; +} + +/** + * ice_ptg_alloc_val - Allocates a new packet type group ID by value + * @hw: pointer to the hardware structure + * @blk: HW block + * @ptg: the PTG to allocate + * + * This function allocates a given packet type group ID specified by the PTG + * parameter. + */ +static void ice_ptg_alloc_val(struct ice_hw *hw, enum ice_block blk, u8 ptg) +{ + hw->blk[blk].xlt1.ptg_tbl[ptg].in_use = true; +} + +/** + * ice_ptg_remove_ptype - Removes ptype from a particular packet type group + * @hw: pointer to the hardware structure + * @blk: HW block + * @ptype: the ptype to remove + * @ptg: the PTG to remove the ptype from + * + * This function will remove the ptype from the specific PTG, and move it to + * the default PTG (ICE_DEFAULT_PTG). + */ +static enum ice_status +ice_ptg_remove_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg) +{ + struct ice_ptg_ptype **ch; + struct ice_ptg_ptype *p; + + if (ptype > ICE_XLT1_CNT - 1) + return ICE_ERR_PARAM; + + if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use) + return ICE_ERR_DOES_NOT_EXIST; + + /* Should not happen if .in_use is set, bad config */ + if (!hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype) + return ICE_ERR_CFG; + + /* find the ptype within this PTG, and bypass the link over it */ + p = hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype; + ch = &hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype; + while (p) { + if (ptype == (p - hw->blk[blk].xlt1.ptypes)) { + *ch = p->next_ptype; + break; + } + + ch = &p->next_ptype; + p = p->next_ptype; + } + + hw->blk[blk].xlt1.ptypes[ptype].ptg = ICE_DEFAULT_PTG; + hw->blk[blk].xlt1.ptypes[ptype].next_ptype = NULL; + + return 0; +} + +/** + * ice_ptg_add_mv_ptype - Adds/moves ptype to a particular packet type group + * @hw: pointer to the hardware structure + * @blk: HW block + * @ptype: the ptype to add or move + * @ptg: the PTG to add or move the ptype to + * + * This function will either add or move a ptype to a particular PTG depending + * on if the ptype is already part of another group. Note that using a + * a destination PTG ID of ICE_DEFAULT_PTG (0) will move the ptype to the + * default PTG. + */ +static enum ice_status +ice_ptg_add_mv_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg) +{ + enum ice_status status; + u8 original_ptg; + + if (ptype > ICE_XLT1_CNT - 1) + return ICE_ERR_PARAM; + + if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use && ptg != ICE_DEFAULT_PTG) + return ICE_ERR_DOES_NOT_EXIST; + + status = ice_ptg_find_ptype(hw, blk, ptype, &original_ptg); + if (status) + return status; + + /* Is ptype already in the correct PTG? */ + if (original_ptg == ptg) + return 0; + + /* Remove from original PTG and move back to the default PTG */ + if (original_ptg != ICE_DEFAULT_PTG) + ice_ptg_remove_ptype(hw, blk, ptype, original_ptg); + + /* Moving to default PTG? Then we're done with this request */ + if (ptg == ICE_DEFAULT_PTG) + return 0; + + /* Add ptype to PTG at beginning of list */ + hw->blk[blk].xlt1.ptypes[ptype].next_ptype = + hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype; + hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype = + &hw->blk[blk].xlt1.ptypes[ptype]; + + hw->blk[blk].xlt1.ptypes[ptype].ptg = ptg; + hw->blk[blk].xlt1.t[ptype] = ptg; + + return 0; +} + +/* Block / table size info */ +struct ice_blk_size_details { + u16 xlt1; /* # XLT1 entries */ + u16 xlt2; /* # XLT2 entries */ + u16 prof_tcam; /* # profile ID TCAM entries */ + u16 prof_id; /* # profile IDs */ + u8 prof_cdid_bits; /* # CDID one-hot bits used in key */ + u16 prof_redir; /* # profile redirection entries */ + u16 es; /* # extraction sequence entries */ + u16 fvw; /* # field vector words */ + u8 overwrite; /* overwrite existing entries allowed */ + u8 reverse; /* reverse FV order */ +}; + +static const struct ice_blk_size_details blk_sizes[ICE_BLK_COUNT] = { + /** + * Table Definitions + * XLT1 - Number of entries in XLT1 table + * XLT2 - Number of entries in XLT2 table + * TCAM - Number of entries Profile ID TCAM table + * CDID - Control Domain ID of the hardware block + * PRED - Number of entries in the Profile Redirection Table + * FV - Number of entries in the Field Vector + * FVW - Width (in WORDs) of the Field Vector + * OVR - Overwrite existing table entries + * REV - Reverse FV + */ + /* XLT1 , XLT2 ,TCAM, PID,CDID,PRED, FV, FVW */ + /* Overwrite , Reverse FV */ + /* SW */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 256, 0, 256, 256, 48, + false, false }, + /* ACL */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128, 0, 128, 128, 32, + false, false }, + /* FD */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128, 0, 128, 128, 24, + false, true }, + /* RSS */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128, 0, 128, 128, 24, + true, true }, + /* PE */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 64, 32, 0, 32, 32, 24, + false, false }, +}; + +enum ice_sid_all { + ICE_SID_XLT1_OFF = 0, + ICE_SID_XLT2_OFF, + ICE_SID_PR_OFF, + ICE_SID_PR_REDIR_OFF, + ICE_SID_ES_OFF, + ICE_SID_OFF_COUNT, +}; + +/* Characteristic handling */ + +/** + * ice_match_prop_lst - determine if properties of two lists match + * @list1: first properties list + * @list2: second properties list + * + * Count, cookies and the order must match in order to be considered equivalent. + */ +static bool +ice_match_prop_lst(struct list_head *list1, struct list_head *list2) +{ + struct ice_vsig_prof *tmp1; + struct ice_vsig_prof *tmp2; + u16 chk_count = 0; + u16 count = 0; + + /* compare counts */ + list_for_each_entry(tmp1, list1, list) + count++; + list_for_each_entry(tmp2, list2, list) + chk_count++; + if (!count || count != chk_count) + return false; + + tmp1 = list_first_entry(list1, struct ice_vsig_prof, list); + tmp2 = list_first_entry(list2, struct ice_vsig_prof, list); + + /* profile cookies must compare, and in the exact same order to take + * into account priority + */ + while (count--) { + if (tmp2->profile_cookie != tmp1->profile_cookie) + return false; + + tmp1 = list_next_entry(tmp1, list); + tmp2 = list_next_entry(tmp2, list); + } + + return true; +} + +/* VSIG Management */ + +/** + * ice_vsig_find_vsi - find a VSIG that contains a specified VSI + * @hw: pointer to the hardware structure + * @blk: HW block + * @vsi: VSI of interest + * @vsig: pointer to receive the VSI group + * + * This function will lookup the VSI entry in the XLT2 list and return + * the VSI group its associated with. + */ +static enum ice_status +ice_vsig_find_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 *vsig) +{ + if (!vsig || vsi >= ICE_MAX_VSI) + return ICE_ERR_PARAM; + + /* As long as there's a default or valid VSIG associated with the input + * VSI, the functions returns a success. Any handling of VSIG will be + * done by the following add, update or remove functions. + */ + *vsig = hw->blk[blk].xlt2.vsis[vsi].vsig; + + return 0; +} + +/** + * ice_vsig_alloc_val - allocate a new VSIG by value + * @hw: pointer to the hardware structure + * @blk: HW block + * @vsig: the VSIG to allocate + * + * This function will allocate a given VSIG specified by the VSIG parameter. + */ +static u16 ice_vsig_alloc_val(struct ice_hw *hw, enum ice_block blk, u16 vsig) +{ + u16 idx = vsig & ICE_VSIG_IDX_M; + + if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) { + INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst); + hw->blk[blk].xlt2.vsig_tbl[idx].in_use = true; + } + + return ICE_VSIG_VALUE(idx, hw->pf_id); +} + +/** + * ice_vsig_alloc - Finds a free entry and allocates a new VSIG + * @hw: pointer to the hardware structure + * @blk: HW block + * + * This function will iterate through the VSIG list and mark the first + * unused entry for the new VSIG entry as used and return that value. + */ +static u16 ice_vsig_alloc(struct ice_hw *hw, enum ice_block blk) +{ + u16 i; + + for (i = 1; i < ICE_MAX_VSIGS; i++) + if (!hw->blk[blk].xlt2.vsig_tbl[i].in_use) + return ice_vsig_alloc_val(hw, blk, i); + + return ICE_DEFAULT_VSIG; +} + +/** + * ice_find_dup_props_vsig - find VSI group with a specified set of properties + * @hw: pointer to the hardware structure + * @blk: HW block + * @chs: characteristic list + * @vsig: returns the VSIG with the matching profiles, if found + * + * Each VSIG is associated with a characteristic set; i.e. all VSIs under + * a group have the same characteristic set. To check if there exists a VSIG + * which has the same characteristics as the input characteristics; this + * function will iterate through the XLT2 list and return the VSIG that has a + * matching configuration. In order to make sure that priorities are accounted + * for, the list must match exactly, including the order in which the + * characteristics are listed. + */ +static enum ice_status +ice_find_dup_props_vsig(struct ice_hw *hw, enum ice_block blk, + struct list_head *chs, u16 *vsig) +{ + struct ice_xlt2 *xlt2 = &hw->blk[blk].xlt2; + u16 i; + + for (i = 0; i < xlt2->count; i++) + if (xlt2->vsig_tbl[i].in_use && + ice_match_prop_lst(chs, &xlt2->vsig_tbl[i].prop_lst)) { + *vsig = ICE_VSIG_VALUE(i, hw->pf_id); + return 0; + } + + return ICE_ERR_DOES_NOT_EXIST; +} + +/** + * ice_vsig_free - free VSI group + * @hw: pointer to the hardware structure + * @blk: HW block + * @vsig: VSIG to remove + * + * The function will remove all VSIs associated with the input VSIG and move + * them to the DEFAULT_VSIG and mark the VSIG available. + */ +static enum ice_status +ice_vsig_free(struct ice_hw *hw, enum ice_block blk, u16 vsig) +{ + struct ice_vsig_prof *dtmp, *del; + struct ice_vsig_vsi *vsi_cur; + u16 idx; + + idx = vsig & ICE_VSIG_IDX_M; + if (idx >= ICE_MAX_VSIGS) + return ICE_ERR_PARAM; + + if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) + return ICE_ERR_DOES_NOT_EXIST; + + hw->blk[blk].xlt2.vsig_tbl[idx].in_use = false; + + vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi; + /* If the VSIG has at least 1 VSI then iterate through the + * list and remove the VSIs before deleting the group. + */ + if (vsi_cur) { + /* remove all vsis associated with this VSIG XLT2 entry */ + do { + struct ice_vsig_vsi *tmp = vsi_cur->next_vsi; + + vsi_cur->vsig = ICE_DEFAULT_VSIG; + vsi_cur->changed = 1; + vsi_cur->next_vsi = NULL; + vsi_cur = tmp; + } while (vsi_cur); + + /* NULL terminate head of VSI list */ + hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi = NULL; + } + + /* free characteristic list */ + list_for_each_entry_safe(del, dtmp, + &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst, + list) { + list_del(&del->list); + devm_kfree(ice_hw_to_dev(hw), del); + } + + /* if VSIG characteristic list was cleared for reset + * re-initialize the list head + */ + INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst); + + return 0; +} + +/** + * ice_vsig_remove_vsi - remove VSI from VSIG + * @hw: pointer to the hardware structure + * @blk: HW block + * @vsi: VSI to remove + * @vsig: VSI group to remove from + * + * The function will remove the input VSI from its VSI group and move it + * to the DEFAULT_VSIG. + */ +static enum ice_status +ice_vsig_remove_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig) +{ + struct ice_vsig_vsi **vsi_head, *vsi_cur, *vsi_tgt; + u16 idx; + + idx = vsig & ICE_VSIG_IDX_M; + + if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS) + return ICE_ERR_PARAM; + + if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) + return ICE_ERR_DOES_NOT_EXIST; + + /* entry already in default VSIG, don't have to remove */ + if (idx == ICE_DEFAULT_VSIG) + return 0; + + vsi_head = &hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi; + if (!(*vsi_head)) + return ICE_ERR_CFG; + + vsi_tgt = &hw->blk[blk].xlt2.vsis[vsi]; + vsi_cur = (*vsi_head); + + /* iterate the VSI list, skip over the entry to be removed */ + while (vsi_cur) { + if (vsi_tgt == vsi_cur) { + (*vsi_head) = vsi_cur->next_vsi; + break; + } + vsi_head = &vsi_cur->next_vsi; + vsi_cur = vsi_cur->next_vsi; + } + + /* verify if VSI was removed from group list */ + if (!vsi_cur) + return ICE_ERR_DOES_NOT_EXIST; + + vsi_cur->vsig = ICE_DEFAULT_VSIG; + vsi_cur->changed = 1; + vsi_cur->next_vsi = NULL; + + return 0; +} + +/** + * ice_vsig_add_mv_vsi - add or move a VSI to a VSI group + * @hw: pointer to the hardware structure + * @blk: HW block + * @vsi: VSI to move + * @vsig: destination VSI group + * + * This function will move or add the input VSI to the target VSIG. + * The function will find the original VSIG the VSI belongs to and + * move the entry to the DEFAULT_VSIG, update the original VSIG and + * then move entry to the new VSIG. + */ +static enum ice_status +ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig) +{ + struct ice_vsig_vsi *tmp; + enum ice_status status; + u16 orig_vsig, idx; + + idx = vsig & ICE_VSIG_IDX_M; + + if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS) + return ICE_ERR_PARAM; + + /* if VSIG not in use and VSIG is not default type this VSIG + * doesn't exist. + */ + if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use && + vsig != ICE_DEFAULT_VSIG) + return ICE_ERR_DOES_NOT_EXIST; + + status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig); + if (status) + return status; + + /* no update required if vsigs match */ + if (orig_vsig == vsig) + return 0; + + if (orig_vsig != ICE_DEFAULT_VSIG) { + /* remove entry from orig_vsig and add to default VSIG */ + status = ice_vsig_remove_vsi(hw, blk, vsi, orig_vsig); + if (status) + return status; + } + + if (idx == ICE_DEFAULT_VSIG) + return 0; + + /* Create VSI entry and add VSIG and prop_mask values */ + hw->blk[blk].xlt2.vsis[vsi].vsig = vsig; + hw->blk[blk].xlt2.vsis[vsi].changed = 1; + + /* Add new entry to the head of the VSIG list */ + tmp = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi; + hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi = + &hw->blk[blk].xlt2.vsis[vsi]; + hw->blk[blk].xlt2.vsis[vsi].next_vsi = tmp; + hw->blk[blk].xlt2.t[vsi] = vsig; + + return 0; +} + +/** + * ice_find_prof_id - find profile ID for a given field vector + * @hw: pointer to the hardware structure + * @blk: HW block + * @fv: field vector to search for + * @prof_id: receives the profile ID + */ +static enum ice_status +ice_find_prof_id(struct ice_hw *hw, enum ice_block blk, + struct ice_fv_word *fv, u8 *prof_id) +{ + struct ice_es *es = &hw->blk[blk].es; + u16 off, i; + + for (i = 0; i < es->count; i++) { + off = i * es->fvw; + + if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv))) + continue; + + *prof_id = i; + return 0; + } + + return ICE_ERR_DOES_NOT_EXIST; +} + +/** + * ice_prof_id_rsrc_type - get profile ID resource type for a block type + * @blk: the block type + * @rsrc_type: pointer to variable to receive the resource type + */ +static bool ice_prof_id_rsrc_type(enum ice_block blk, u16 *rsrc_type) +{ + switch (blk) { + case ICE_BLK_RSS: + *rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID; + break; + default: + return false; + } + return true; +} + +/** + * ice_tcam_ent_rsrc_type - get TCAM entry resource type for a block type + * @blk: the block type + * @rsrc_type: pointer to variable to receive the resource type + */ +static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type) +{ + switch (blk) { + case ICE_BLK_RSS: + *rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM; + break; + default: + return false; + } + return true; +} + +/** + * ice_alloc_tcam_ent - allocate hardware TCAM entry + * @hw: pointer to the HW struct + * @blk: the block to allocate the TCAM for + * @tcam_idx: pointer to variable to receive the TCAM entry + * + * This function allocates a new entry in a Profile ID TCAM for a specific + * block. + */ +static enum ice_status +ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 *tcam_idx) +{ + u16 res_type; + + if (!ice_tcam_ent_rsrc_type(blk, &res_type)) + return ICE_ERR_PARAM; + + return ice_alloc_hw_res(hw, res_type, 1, true, tcam_idx); +} + +/** + * ice_free_tcam_ent - free hardware TCAM entry + * @hw: pointer to the HW struct + * @blk: the block from which to free the TCAM entry + * @tcam_idx: the TCAM entry to free + * + * This function frees an entry in a Profile ID TCAM for a specific block. + */ +static enum ice_status +ice_free_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 tcam_idx) +{ + u16 res_type; + + if (!ice_tcam_ent_rsrc_type(blk, &res_type)) + return ICE_ERR_PARAM; + + return ice_free_hw_res(hw, res_type, 1, &tcam_idx); +} + +/** + * ice_alloc_prof_id - allocate profile ID + * @hw: pointer to the HW struct + * @blk: the block to allocate the profile ID for + * @prof_id: pointer to variable to receive the profile ID + * + * This function allocates a new profile ID, which also corresponds to a Field + * Vector (Extraction Sequence) entry. + */ +static enum ice_status +ice_alloc_prof_id(struct ice_hw *hw, enum ice_block blk, u8 *prof_id) +{ + enum ice_status status; + u16 res_type; + u16 get_prof; + + if (!ice_prof_id_rsrc_type(blk, &res_type)) + return ICE_ERR_PARAM; + + status = ice_alloc_hw_res(hw, res_type, 1, false, &get_prof); + if (!status) + *prof_id = (u8)get_prof; + + return status; +} + +/** + * ice_free_prof_id - free profile ID + * @hw: pointer to the HW struct + * @blk: the block from which to free the profile ID + * @prof_id: the profile ID to free + * + * This function frees a profile ID, which also corresponds to a Field Vector. + */ +static enum ice_status +ice_free_prof_id(struct ice_hw *hw, enum ice_block blk, u8 prof_id) +{ + u16 tmp_prof_id = (u16)prof_id; + u16 res_type; + + if (!ice_prof_id_rsrc_type(blk, &res_type)) + return ICE_ERR_PARAM; + + return ice_free_hw_res(hw, res_type, 1, &tmp_prof_id); +} + +/** + * ice_prof_inc_ref - increment reference count for profile + * @hw: pointer to the HW struct + * @blk: the block from which to free the profile ID + * @prof_id: the profile ID for which to increment the reference count + */ +static enum ice_status +ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id) +{ + if (prof_id > hw->blk[blk].es.count) + return ICE_ERR_PARAM; + + hw->blk[blk].es.ref_count[prof_id]++; + + return 0; +} + +/** + * ice_write_es - write an extraction sequence to hardware + * @hw: pointer to the HW struct + * @blk: the block in which to write the extraction sequence + * @prof_id: the profile ID to write + * @fv: pointer to the extraction sequence to write - NULL to clear extraction + */ +static void +ice_write_es(struct ice_hw *hw, enum ice_block blk, u8 prof_id, + struct ice_fv_word *fv) +{ + u16 off; + + off = prof_id * hw->blk[blk].es.fvw; + if (!fv) { + memset(&hw->blk[blk].es.t[off], 0, + hw->blk[blk].es.fvw * sizeof(*fv)); + hw->blk[blk].es.written[prof_id] = false; + } else { + memcpy(&hw->blk[blk].es.t[off], fv, + hw->blk[blk].es.fvw * sizeof(*fv)); + } +} + +/** + * ice_prof_dec_ref - decrement reference count for profile + * @hw: pointer to the HW struct + * @blk: the block from which to free the profile ID + * @prof_id: the profile ID for which to decrement the reference count + */ +static enum ice_status +ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id) +{ + if (prof_id > hw->blk[blk].es.count) + return ICE_ERR_PARAM; + + if (hw->blk[blk].es.ref_count[prof_id] > 0) { + if (!--hw->blk[blk].es.ref_count[prof_id]) { + ice_write_es(hw, blk, prof_id, NULL); + return ice_free_prof_id(hw, blk, prof_id); + } + } + + return 0; +} + +/* Block / table section IDs */ +static const u32 ice_blk_sids[ICE_BLK_COUNT][ICE_SID_OFF_COUNT] = { + /* SWITCH */ + { ICE_SID_XLT1_SW, + ICE_SID_XLT2_SW, + ICE_SID_PROFID_TCAM_SW, + ICE_SID_PROFID_REDIR_SW, + ICE_SID_FLD_VEC_SW + }, + + /* ACL */ + { ICE_SID_XLT1_ACL, + ICE_SID_XLT2_ACL, + ICE_SID_PROFID_TCAM_ACL, + ICE_SID_PROFID_REDIR_ACL, + ICE_SID_FLD_VEC_ACL + }, + + /* FD */ + { ICE_SID_XLT1_FD, + ICE_SID_XLT2_FD, + ICE_SID_PROFID_TCAM_FD, + ICE_SID_PROFID_REDIR_FD, + ICE_SID_FLD_VEC_FD + }, + + /* RSS */ + { ICE_SID_XLT1_RSS, + ICE_SID_XLT2_RSS, + ICE_SID_PROFID_TCAM_RSS, + ICE_SID_PROFID_REDIR_RSS, + ICE_SID_FLD_VEC_RSS + }, + + /* PE */ + { ICE_SID_XLT1_PE, + ICE_SID_XLT2_PE, + ICE_SID_PROFID_TCAM_PE, + ICE_SID_PROFID_REDIR_PE, + ICE_SID_FLD_VEC_PE + } +}; + +/** + * ice_init_sw_xlt1_db - init software XLT1 database from HW tables + * @hw: pointer to the hardware structure + * @blk: the HW block to initialize + */ +static void ice_init_sw_xlt1_db(struct ice_hw *hw, enum ice_block blk) +{ + u16 pt; + + for (pt = 0; pt < hw->blk[blk].xlt1.count; pt++) { + u8 ptg; + + ptg = hw->blk[blk].xlt1.t[pt]; + if (ptg != ICE_DEFAULT_PTG) { + ice_ptg_alloc_val(hw, blk, ptg); + ice_ptg_add_mv_ptype(hw, blk, pt, ptg); + } + } +} + +/** + * ice_init_sw_xlt2_db - init software XLT2 database from HW tables + * @hw: pointer to the hardware structure + * @blk: the HW block to initialize + */ +static void ice_init_sw_xlt2_db(struct ice_hw *hw, enum ice_block blk) +{ + u16 vsi; + + for (vsi = 0; vsi < hw->blk[blk].xlt2.count; vsi++) { + u16 vsig; + + vsig = hw->blk[blk].xlt2.t[vsi]; + if (vsig) { + ice_vsig_alloc_val(hw, blk, vsig); + ice_vsig_add_mv_vsi(hw, blk, vsi, vsig); + /* no changes at this time, since this has been + * initialized from the original package + */ + hw->blk[blk].xlt2.vsis[vsi].changed = 0; + } + } +} + +/** + * ice_init_sw_db - init software database from HW tables + * @hw: pointer to the hardware structure + */ +static void ice_init_sw_db(struct ice_hw *hw) +{ + u16 i; + + for (i = 0; i < ICE_BLK_COUNT; i++) { + ice_init_sw_xlt1_db(hw, (enum ice_block)i); + ice_init_sw_xlt2_db(hw, (enum ice_block)i); + } +} + +/** + * ice_fill_tbl - Reads content of a single table type into database + * @hw: pointer to the hardware structure + * @block_id: Block ID of the table to copy + * @sid: Section ID of the table to copy + * + * Will attempt to read the entire content of a given table of a single block + * into the driver database. We assume that the buffer will always + * be as large or larger than the data contained in the package. If + * this condition is not met, there is most likely an error in the package + * contents. + */ +static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid) +{ + u32 dst_len, sect_len, offset = 0; + struct ice_prof_redir_section *pr; + struct ice_prof_id_section *pid; + struct ice_xlt1_section *xlt1; + struct ice_xlt2_section *xlt2; + struct ice_sw_fv_section *es; + struct ice_pkg_enum state; + u8 *src, *dst; + void *sect; + + /* if the HW segment pointer is null then the first iteration of + * ice_pkg_enum_section() will fail. In this case the HW tables will + * not be filled and return success. + */ + if (!hw->seg) { + ice_debug(hw, ICE_DBG_PKG, "hw->seg is NULL, tables are not filled\n"); + return; + } + + memset(&state, 0, sizeof(state)); + + sect = ice_pkg_enum_section(hw->seg, &state, sid); + + while (sect) { + switch (sid) { + case ICE_SID_XLT1_SW: + case ICE_SID_XLT1_FD: + case ICE_SID_XLT1_RSS: + case ICE_SID_XLT1_ACL: + case ICE_SID_XLT1_PE: + xlt1 = (struct ice_xlt1_section *)sect; + src = xlt1->value; + sect_len = le16_to_cpu(xlt1->count) * + sizeof(*hw->blk[block_id].xlt1.t); + dst = hw->blk[block_id].xlt1.t; + dst_len = hw->blk[block_id].xlt1.count * + sizeof(*hw->blk[block_id].xlt1.t); + break; + case ICE_SID_XLT2_SW: + case ICE_SID_XLT2_FD: + case ICE_SID_XLT2_RSS: + case ICE_SID_XLT2_ACL: + case ICE_SID_XLT2_PE: + xlt2 = (struct ice_xlt2_section *)sect; + src = (__force u8 *)xlt2->value; + sect_len = le16_to_cpu(xlt2->count) * + sizeof(*hw->blk[block_id].xlt2.t); + dst = (u8 *)hw->blk[block_id].xlt2.t; + dst_len = hw->blk[block_id].xlt2.count * + sizeof(*hw->blk[block_id].xlt2.t); + break; + case ICE_SID_PROFID_TCAM_SW: + case ICE_SID_PROFID_TCAM_FD: + case ICE_SID_PROFID_TCAM_RSS: + case ICE_SID_PROFID_TCAM_ACL: + case ICE_SID_PROFID_TCAM_PE: + pid = (struct ice_prof_id_section *)sect; + src = (u8 *)pid->entry; + sect_len = le16_to_cpu(pid->count) * + sizeof(*hw->blk[block_id].prof.t); + dst = (u8 *)hw->blk[block_id].prof.t; + dst_len = hw->blk[block_id].prof.count * + sizeof(*hw->blk[block_id].prof.t); + break; + case ICE_SID_PROFID_REDIR_SW: + case ICE_SID_PROFID_REDIR_FD: + case ICE_SID_PROFID_REDIR_RSS: + case ICE_SID_PROFID_REDIR_ACL: + case ICE_SID_PROFID_REDIR_PE: + pr = (struct ice_prof_redir_section *)sect; + src = pr->redir_value; + sect_len = le16_to_cpu(pr->count) * + sizeof(*hw->blk[block_id].prof_redir.t); + dst = hw->blk[block_id].prof_redir.t; + dst_len = hw->blk[block_id].prof_redir.count * + sizeof(*hw->blk[block_id].prof_redir.t); + break; + case ICE_SID_FLD_VEC_SW: + case ICE_SID_FLD_VEC_FD: + case ICE_SID_FLD_VEC_RSS: + case ICE_SID_FLD_VEC_ACL: + case ICE_SID_FLD_VEC_PE: + es = (struct ice_sw_fv_section *)sect; + src = (u8 *)es->fv; + sect_len = (u32)(le16_to_cpu(es->count) * + hw->blk[block_id].es.fvw) * + sizeof(*hw->blk[block_id].es.t); + dst = (u8 *)hw->blk[block_id].es.t; + dst_len = (u32)(hw->blk[block_id].es.count * + hw->blk[block_id].es.fvw) * + sizeof(*hw->blk[block_id].es.t); + break; + default: + return; + } + + /* if the section offset exceeds destination length, terminate + * table fill. + */ + if (offset > dst_len) + return; + + /* if the sum of section size and offset exceed destination size + * then we are out of bounds of the HW table size for that PF. + * Changing section length to fill the remaining table space + * of that PF. + */ + if ((offset + sect_len) > dst_len) + sect_len = dst_len - offset; + + memcpy(dst + offset, src, sect_len); + offset += sect_len; + sect = ice_pkg_enum_section(NULL, &state, sid); + } +} + +/** + * ice_fill_blk_tbls - Read package context for tables + * @hw: pointer to the hardware structure + * + * Reads the current package contents and populates the driver + * database with the data iteratively for all advanced feature + * blocks. Assume that the HW tables have been allocated. + */ +void ice_fill_blk_tbls(struct ice_hw *hw) +{ + u8 i; + + for (i = 0; i < ICE_BLK_COUNT; i++) { + enum ice_block blk_id = (enum ice_block)i; + + ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt1.sid); + ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt2.sid); + ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof.sid); + ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof_redir.sid); + ice_fill_tbl(hw, blk_id, hw->blk[blk_id].es.sid); + } + + ice_init_sw_db(hw); +} + +/** + * ice_free_prof_map - free profile map + * @hw: pointer to the hardware structure + * @blk_idx: HW block index + */ +static void ice_free_prof_map(struct ice_hw *hw, u8 blk_idx) +{ + struct ice_es *es = &hw->blk[blk_idx].es; + struct ice_prof_map *del, *tmp; + + mutex_lock(&es->prof_map_lock); + list_for_each_entry_safe(del, tmp, &es->prof_map, list) { + list_del(&del->list); + devm_kfree(ice_hw_to_dev(hw), del); + } + INIT_LIST_HEAD(&es->prof_map); + mutex_unlock(&es->prof_map_lock); +} + +/** + * ice_free_flow_profs - free flow profile entries + * @hw: pointer to the hardware structure + * @blk_idx: HW block index + */ +static void ice_free_flow_profs(struct ice_hw *hw, u8 blk_idx) +{ + struct ice_flow_prof *p, *tmp; + + mutex_lock(&hw->fl_profs_locks[blk_idx]); + list_for_each_entry_safe(p, tmp, &hw->fl_profs[blk_idx], l_entry) { + list_del(&p->l_entry); + devm_kfree(ice_hw_to_dev(hw), p); + } + mutex_unlock(&hw->fl_profs_locks[blk_idx]); + + /* if driver is in reset and tables are being cleared + * re-initialize the flow profile list heads + */ + INIT_LIST_HEAD(&hw->fl_profs[blk_idx]); +} + +/** + * ice_free_vsig_tbl - free complete VSIG table entries + * @hw: pointer to the hardware structure + * @blk: the HW block on which to free the VSIG table entries + */ +static void ice_free_vsig_tbl(struct ice_hw *hw, enum ice_block blk) +{ + u16 i; + + if (!hw->blk[blk].xlt2.vsig_tbl) + return; + + for (i = 1; i < ICE_MAX_VSIGS; i++) + if (hw->blk[blk].xlt2.vsig_tbl[i].in_use) + ice_vsig_free(hw, blk, i); +} + +/** + * ice_free_hw_tbls - free hardware table memory + * @hw: pointer to the hardware structure + */ +void ice_free_hw_tbls(struct ice_hw *hw) +{ + struct ice_rss_cfg *r, *rt; + u8 i; + + for (i = 0; i < ICE_BLK_COUNT; i++) { + if (hw->blk[i].is_list_init) { + struct ice_es *es = &hw->blk[i].es; + + ice_free_prof_map(hw, i); + mutex_destroy(&es->prof_map_lock); + + ice_free_flow_profs(hw, i); + mutex_destroy(&hw->fl_profs_locks[i]); + + hw->blk[i].is_list_init = false; + } + ice_free_vsig_tbl(hw, (enum ice_block)i); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptypes); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptg_tbl); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.t); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.t); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsig_tbl); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsis); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof.t); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof_redir.t); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written); + } + + list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) { + list_del(&r->l_entry); + devm_kfree(ice_hw_to_dev(hw), r); + } + mutex_destroy(&hw->rss_locks); + memset(hw->blk, 0, sizeof(hw->blk)); +} + +/** + * ice_init_flow_profs - init flow profile locks and list heads + * @hw: pointer to the hardware structure + * @blk_idx: HW block index + */ +static void ice_init_flow_profs(struct ice_hw *hw, u8 blk_idx) +{ + mutex_init(&hw->fl_profs_locks[blk_idx]); + INIT_LIST_HEAD(&hw->fl_profs[blk_idx]); +} + +/** + * ice_clear_hw_tbls - clear HW tables and flow profiles + * @hw: pointer to the hardware structure + */ +void ice_clear_hw_tbls(struct ice_hw *hw) +{ + u8 i; + + for (i = 0; i < ICE_BLK_COUNT; i++) { + struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir; + struct ice_prof_tcam *prof = &hw->blk[i].prof; + struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1; + struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2; + struct ice_es *es = &hw->blk[i].es; + + if (hw->blk[i].is_list_init) { + ice_free_prof_map(hw, i); + ice_free_flow_profs(hw, i); + } + + ice_free_vsig_tbl(hw, (enum ice_block)i); + + memset(xlt1->ptypes, 0, xlt1->count * sizeof(*xlt1->ptypes)); + memset(xlt1->ptg_tbl, 0, + ICE_MAX_PTGS * sizeof(*xlt1->ptg_tbl)); + memset(xlt1->t, 0, xlt1->count * sizeof(*xlt1->t)); + + memset(xlt2->vsis, 0, xlt2->count * sizeof(*xlt2->vsis)); + memset(xlt2->vsig_tbl, 0, + xlt2->count * sizeof(*xlt2->vsig_tbl)); + memset(xlt2->t, 0, xlt2->count * sizeof(*xlt2->t)); + + memset(prof->t, 0, prof->count * sizeof(*prof->t)); + memset(prof_redir->t, 0, + prof_redir->count * sizeof(*prof_redir->t)); + + memset(es->t, 0, es->count * sizeof(*es->t)); + memset(es->ref_count, 0, es->count * sizeof(*es->ref_count)); + memset(es->written, 0, es->count * sizeof(*es->written)); + } +} + +/** + * ice_init_hw_tbls - init hardware table memory + * @hw: pointer to the hardware structure + */ +enum ice_status ice_init_hw_tbls(struct ice_hw *hw) +{ + u8 i; + + mutex_init(&hw->rss_locks); + INIT_LIST_HEAD(&hw->rss_list_head); + for (i = 0; i < ICE_BLK_COUNT; i++) { + struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir; + struct ice_prof_tcam *prof = &hw->blk[i].prof; + struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1; + struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2; + struct ice_es *es = &hw->blk[i].es; + u16 j; + + if (hw->blk[i].is_list_init) + continue; + + ice_init_flow_profs(hw, i); + mutex_init(&es->prof_map_lock); + INIT_LIST_HEAD(&es->prof_map); + hw->blk[i].is_list_init = true; + + hw->blk[i].overwrite = blk_sizes[i].overwrite; + es->reverse = blk_sizes[i].reverse; + + xlt1->sid = ice_blk_sids[i][ICE_SID_XLT1_OFF]; + xlt1->count = blk_sizes[i].xlt1; + + xlt1->ptypes = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count, + sizeof(*xlt1->ptypes), GFP_KERNEL); + + if (!xlt1->ptypes) + goto err; + + xlt1->ptg_tbl = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_PTGS, + sizeof(*xlt1->ptg_tbl), + GFP_KERNEL); + + if (!xlt1->ptg_tbl) + goto err; + + xlt1->t = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count, + sizeof(*xlt1->t), GFP_KERNEL); + if (!xlt1->t) + goto err; + + xlt2->sid = ice_blk_sids[i][ICE_SID_XLT2_OFF]; + xlt2->count = blk_sizes[i].xlt2; + + xlt2->vsis = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count, + sizeof(*xlt2->vsis), GFP_KERNEL); + + if (!xlt2->vsis) + goto err; + + xlt2->vsig_tbl = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count, + sizeof(*xlt2->vsig_tbl), + GFP_KERNEL); + if (!xlt2->vsig_tbl) + goto err; + + for (j = 0; j < xlt2->count; j++) + INIT_LIST_HEAD(&xlt2->vsig_tbl[j].prop_lst); + + xlt2->t = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count, + sizeof(*xlt2->t), GFP_KERNEL); + if (!xlt2->t) + goto err; + + prof->sid = ice_blk_sids[i][ICE_SID_PR_OFF]; + prof->count = blk_sizes[i].prof_tcam; + prof->max_prof_id = blk_sizes[i].prof_id; + prof->cdid_bits = blk_sizes[i].prof_cdid_bits; + prof->t = devm_kcalloc(ice_hw_to_dev(hw), prof->count, + sizeof(*prof->t), GFP_KERNEL); + + if (!prof->t) + goto err; + + prof_redir->sid = ice_blk_sids[i][ICE_SID_PR_REDIR_OFF]; + prof_redir->count = blk_sizes[i].prof_redir; + prof_redir->t = devm_kcalloc(ice_hw_to_dev(hw), + prof_redir->count, + sizeof(*prof_redir->t), + GFP_KERNEL); + + if (!prof_redir->t) + goto err; + + es->sid = ice_blk_sids[i][ICE_SID_ES_OFF]; + es->count = blk_sizes[i].es; + es->fvw = blk_sizes[i].fvw; + es->t = devm_kcalloc(ice_hw_to_dev(hw), + (u32)(es->count * es->fvw), + sizeof(*es->t), GFP_KERNEL); + if (!es->t) + goto err; + + es->ref_count = devm_kcalloc(ice_hw_to_dev(hw), es->count, + sizeof(*es->ref_count), + GFP_KERNEL); + + es->written = devm_kcalloc(ice_hw_to_dev(hw), es->count, + sizeof(*es->written), GFP_KERNEL); + if (!es->ref_count) + goto err; + } + return 0; + +err: + ice_free_hw_tbls(hw); + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_prof_gen_key - generate profile ID key + * @hw: pointer to the HW struct + * @blk: the block in which to write profile ID to + * @ptg: packet type group (PTG) portion of key + * @vsig: VSIG portion of key + * @cdid: CDID portion of key + * @flags: flag portion of key + * @vl_msk: valid mask + * @dc_msk: don't care mask + * @nm_msk: never match mask + * @key: output of profile ID key + */ +static enum ice_status +ice_prof_gen_key(struct ice_hw *hw, enum ice_block blk, u8 ptg, u16 vsig, + u8 cdid, u16 flags, u8 vl_msk[ICE_TCAM_KEY_VAL_SZ], + u8 dc_msk[ICE_TCAM_KEY_VAL_SZ], u8 nm_msk[ICE_TCAM_KEY_VAL_SZ], + u8 key[ICE_TCAM_KEY_SZ]) +{ + struct ice_prof_id_key inkey; + + inkey.xlt1 = ptg; + inkey.xlt2_cdid = cpu_to_le16(vsig); + inkey.flags = cpu_to_le16(flags); + + switch (hw->blk[blk].prof.cdid_bits) { + case 0: + break; + case 2: +#define ICE_CD_2_M 0xC000U +#define ICE_CD_2_S 14 + inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_2_M); + inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_2_S); + break; + case 4: +#define ICE_CD_4_M 0xF000U +#define ICE_CD_4_S 12 + inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_4_M); + inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_4_S); + break; + case 8: +#define ICE_CD_8_M 0xFF00U +#define ICE_CD_8_S 16 + inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_8_M); + inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_8_S); + break; + default: + ice_debug(hw, ICE_DBG_PKG, "Error in profile config\n"); + break; + } + + return ice_set_key(key, ICE_TCAM_KEY_SZ, (u8 *)&inkey, vl_msk, dc_msk, + nm_msk, 0, ICE_TCAM_KEY_SZ / 2); +} + +/** + * ice_tcam_write_entry - write TCAM entry + * @hw: pointer to the HW struct + * @blk: the block in which to write profile ID to + * @idx: the entry index to write to + * @prof_id: profile ID + * @ptg: packet type group (PTG) portion of key + * @vsig: VSIG portion of key + * @cdid: CDID portion of key + * @flags: flag portion of key + * @vl_msk: valid mask + * @dc_msk: don't care mask + * @nm_msk: never match mask + */ +static enum ice_status +ice_tcam_write_entry(struct ice_hw *hw, enum ice_block blk, u16 idx, + u8 prof_id, u8 ptg, u16 vsig, u8 cdid, u16 flags, + u8 vl_msk[ICE_TCAM_KEY_VAL_SZ], + u8 dc_msk[ICE_TCAM_KEY_VAL_SZ], + u8 nm_msk[ICE_TCAM_KEY_VAL_SZ]) +{ + struct ice_prof_tcam_entry; + enum ice_status status; + + status = ice_prof_gen_key(hw, blk, ptg, vsig, cdid, flags, vl_msk, + dc_msk, nm_msk, hw->blk[blk].prof.t[idx].key); + if (!status) { + hw->blk[blk].prof.t[idx].addr = cpu_to_le16(idx); + hw->blk[blk].prof.t[idx].prof_id = prof_id; + } + + return status; +} + +/** + * ice_vsig_get_ref - returns number of VSIs belong to a VSIG + * @hw: pointer to the hardware structure + * @blk: HW block + * @vsig: VSIG to query + * @refs: pointer to variable to receive the reference count + */ +static enum ice_status +ice_vsig_get_ref(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 *refs) +{ + u16 idx = vsig & ICE_VSIG_IDX_M; + struct ice_vsig_vsi *ptr; + + *refs = 0; + + if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) + return ICE_ERR_DOES_NOT_EXIST; + + ptr = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi; + while (ptr) { + (*refs)++; + ptr = ptr->next_vsi; + } + + return 0; +} + +/** + * ice_has_prof_vsig - check to see if VSIG has a specific profile + * @hw: pointer to the hardware structure + * @blk: HW block + * @vsig: VSIG to check against + * @hdl: profile handle + */ +static bool +ice_has_prof_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl) +{ + u16 idx = vsig & ICE_VSIG_IDX_M; + struct ice_vsig_prof *ent; + + list_for_each_entry(ent, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst, + list) + if (ent->profile_cookie == hdl) + return true; + + ice_debug(hw, ICE_DBG_INIT, + "Characteristic list for VSI group %d not found.\n", + vsig); + return false; +} + +/** + * ice_prof_bld_es - build profile ID extraction sequence changes + * @hw: pointer to the HW struct + * @blk: hardware block + * @bld: the update package buffer build to add to + * @chgs: the list of changes to make in hardware + */ +static enum ice_status +ice_prof_bld_es(struct ice_hw *hw, enum ice_block blk, + struct ice_buf_build *bld, struct list_head *chgs) +{ + u16 vec_size = hw->blk[blk].es.fvw * sizeof(struct ice_fv_word); + struct ice_chs_chg *tmp; + + list_for_each_entry(tmp, chgs, list_entry) + if (tmp->type == ICE_PTG_ES_ADD && tmp->add_prof) { + u16 off = tmp->prof_id * hw->blk[blk].es.fvw; + struct ice_pkg_es *p; + u32 id; + + id = ice_sect_id(blk, ICE_VEC_TBL); + p = (struct ice_pkg_es *) + ice_pkg_buf_alloc_section(bld, id, sizeof(*p) + + vec_size - + sizeof(p->es[0])); + + if (!p) + return ICE_ERR_MAX_LIMIT; + + p->count = cpu_to_le16(1); + p->offset = cpu_to_le16(tmp->prof_id); + + memcpy(p->es, &hw->blk[blk].es.t[off], vec_size); + } + + return 0; +} + +/** + * ice_prof_bld_tcam - build profile ID TCAM changes + * @hw: pointer to the HW struct + * @blk: hardware block + * @bld: the update package buffer build to add to + * @chgs: the list of changes to make in hardware + */ +static enum ice_status +ice_prof_bld_tcam(struct ice_hw *hw, enum ice_block blk, + struct ice_buf_build *bld, struct list_head *chgs) +{ + struct ice_chs_chg *tmp; + + list_for_each_entry(tmp, chgs, list_entry) + if (tmp->type == ICE_TCAM_ADD && tmp->add_tcam_idx) { + struct ice_prof_id_section *p; + u32 id; + + id = ice_sect_id(blk, ICE_PROF_TCAM); + p = (struct ice_prof_id_section *) + ice_pkg_buf_alloc_section(bld, id, sizeof(*p)); + + if (!p) + return ICE_ERR_MAX_LIMIT; + + p->count = cpu_to_le16(1); + p->entry[0].addr = cpu_to_le16(tmp->tcam_idx); + p->entry[0].prof_id = tmp->prof_id; + + memcpy(p->entry[0].key, + &hw->blk[blk].prof.t[tmp->tcam_idx].key, + sizeof(hw->blk[blk].prof.t->key)); + } + + return 0; +} + +/** + * ice_prof_bld_xlt1 - build XLT1 changes + * @blk: hardware block + * @bld: the update package buffer build to add to + * @chgs: the list of changes to make in hardware + */ +static enum ice_status +ice_prof_bld_xlt1(enum ice_block blk, struct ice_buf_build *bld, + struct list_head *chgs) +{ + struct ice_chs_chg *tmp; + + list_for_each_entry(tmp, chgs, list_entry) + if (tmp->type == ICE_PTG_ES_ADD && tmp->add_ptg) { + struct ice_xlt1_section *p; + u32 id; + + id = ice_sect_id(blk, ICE_XLT1); + p = (struct ice_xlt1_section *) + ice_pkg_buf_alloc_section(bld, id, sizeof(*p)); + + if (!p) + return ICE_ERR_MAX_LIMIT; + + p->count = cpu_to_le16(1); + p->offset = cpu_to_le16(tmp->ptype); + p->value[0] = tmp->ptg; + } + + return 0; +} + +/** + * ice_prof_bld_xlt2 - build XLT2 changes + * @blk: hardware block + * @bld: the update package buffer build to add to + * @chgs: the list of changes to make in hardware + */ +static enum ice_status +ice_prof_bld_xlt2(enum ice_block blk, struct ice_buf_build *bld, + struct list_head *chgs) +{ + struct ice_chs_chg *tmp; + + list_for_each_entry(tmp, chgs, list_entry) { + struct ice_xlt2_section *p; + u32 id; + + switch (tmp->type) { + case ICE_VSIG_ADD: + case ICE_VSI_MOVE: + case ICE_VSIG_REM: + id = ice_sect_id(blk, ICE_XLT2); + p = (struct ice_xlt2_section *) + ice_pkg_buf_alloc_section(bld, id, sizeof(*p)); + + if (!p) + return ICE_ERR_MAX_LIMIT; + + p->count = cpu_to_le16(1); + p->offset = cpu_to_le16(tmp->vsi); + p->value[0] = cpu_to_le16(tmp->vsig); + break; + default: + break; + } + } + + return 0; +} + +/** + * ice_upd_prof_hw - update hardware using the change list + * @hw: pointer to the HW struct + * @blk: hardware block + * @chgs: the list of changes to make in hardware + */ +static enum ice_status +ice_upd_prof_hw(struct ice_hw *hw, enum ice_block blk, + struct list_head *chgs) +{ + struct ice_buf_build *b; + struct ice_chs_chg *tmp; + enum ice_status status; + u16 pkg_sects; + u16 xlt1 = 0; + u16 xlt2 = 0; + u16 tcam = 0; + u16 es = 0; + u16 sects; + + /* count number of sections we need */ + list_for_each_entry(tmp, chgs, list_entry) { + switch (tmp->type) { + case ICE_PTG_ES_ADD: + if (tmp->add_ptg) + xlt1++; + if (tmp->add_prof) + es++; + break; + case ICE_TCAM_ADD: + tcam++; + break; + case ICE_VSIG_ADD: + case ICE_VSI_MOVE: + case ICE_VSIG_REM: + xlt2++; + break; + default: + break; + } + } + sects = xlt1 + xlt2 + tcam + es; + + if (!sects) + return 0; + + /* Build update package buffer */ + b = ice_pkg_buf_alloc(hw); + if (!b) + return ICE_ERR_NO_MEMORY; + + status = ice_pkg_buf_reserve_section(b, sects); + if (status) + goto error_tmp; + + /* Preserve order of table update: ES, TCAM, PTG, VSIG */ + if (es) { + status = ice_prof_bld_es(hw, blk, b, chgs); + if (status) + goto error_tmp; + } + + if (tcam) { + status = ice_prof_bld_tcam(hw, blk, b, chgs); + if (status) + goto error_tmp; + } + + if (xlt1) { + status = ice_prof_bld_xlt1(blk, b, chgs); + if (status) + goto error_tmp; + } + + if (xlt2) { + status = ice_prof_bld_xlt2(blk, b, chgs); + if (status) + goto error_tmp; + } + + /* After package buffer build check if the section count in buffer is + * non-zero and matches the number of sections detected for package + * update. + */ + pkg_sects = ice_pkg_buf_get_active_sections(b); + if (!pkg_sects || pkg_sects != sects) { + status = ICE_ERR_INVAL_SIZE; + goto error_tmp; + } + + /* update package */ + status = ice_update_pkg(hw, ice_pkg_buf(b), 1); + if (status == ICE_ERR_AQ_ERROR) + ice_debug(hw, ICE_DBG_INIT, "Unable to update HW profile\n"); + +error_tmp: + ice_pkg_buf_free(hw, b); + return status; +} + +/** + * ice_add_prof - add profile + * @hw: pointer to the HW struct + * @blk: hardware block + * @id: profile tracking ID + * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits) + * @es: extraction sequence (length of array is determined by the block) + * + * This function registers a profile, which matches a set of PTGs with a + * particular extraction sequence. While the hardware profile is allocated + * it will not be written until the first call to ice_add_flow that specifies + * the ID value used here. + */ +enum ice_status +ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], + struct ice_fv_word *es) +{ + u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE); + DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT); + struct ice_prof_map *prof; + enum ice_status status; + u32 byte = 0; + u8 prof_id; + + bitmap_zero(ptgs_used, ICE_XLT1_CNT); + + mutex_lock(&hw->blk[blk].es.prof_map_lock); + + /* search for existing profile */ + status = ice_find_prof_id(hw, blk, es, &prof_id); + if (status) { + /* allocate profile ID */ + status = ice_alloc_prof_id(hw, blk, &prof_id); + if (status) + goto err_ice_add_prof; + + /* and write new es */ + ice_write_es(hw, blk, prof_id, es); + } + + ice_prof_inc_ref(hw, blk, prof_id); + + /* add profile info */ + prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*prof), GFP_KERNEL); + if (!prof) + goto err_ice_add_prof; + + prof->profile_cookie = id; + prof->prof_id = prof_id; + prof->ptg_cnt = 0; + prof->context = 0; + + /* build list of ptgs */ + while (bytes && prof->ptg_cnt < ICE_MAX_PTG_PER_PROFILE) { + u32 bit; + + if (!ptypes[byte]) { + bytes--; + byte++; + continue; + } + + /* Examine 8 bits per byte */ + for_each_set_bit(bit, (unsigned long *)&ptypes[byte], + BITS_PER_BYTE) { + u16 ptype; + u8 ptg; + u8 m; + + ptype = byte * BITS_PER_BYTE + bit; + + /* The package should place all ptypes in a non-zero + * PTG, so the following call should never fail. + */ + if (ice_ptg_find_ptype(hw, blk, ptype, &ptg)) + continue; + + /* If PTG is already added, skip and continue */ + if (test_bit(ptg, ptgs_used)) + continue; + + set_bit(ptg, ptgs_used); + prof->ptg[prof->ptg_cnt] = ptg; + + if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE) + break; + + /* nothing left in byte, then exit */ + m = ~((1 << (bit + 1)) - 1); + if (!(ptypes[byte] & m)) + break; + } + + bytes--; + byte++; + } + + list_add(&prof->list, &hw->blk[blk].es.prof_map); + status = 0; + +err_ice_add_prof: + mutex_unlock(&hw->blk[blk].es.prof_map_lock); + return status; +} + +/** + * ice_search_prof_id_low - Search for a profile tracking ID low level + * @hw: pointer to the HW struct + * @blk: hardware block + * @id: profile tracking ID + * + * This will search for a profile tracking ID which was previously added. This + * version assumes that the caller has already acquired the prof map lock. + */ +static struct ice_prof_map * +ice_search_prof_id_low(struct ice_hw *hw, enum ice_block blk, u64 id) +{ + struct ice_prof_map *entry = NULL; + struct ice_prof_map *map; + + list_for_each_entry(map, &hw->blk[blk].es.prof_map, list) + if (map->profile_cookie == id) { + entry = map; + break; + } + + return entry; +} + +/** + * ice_search_prof_id - Search for a profile tracking ID + * @hw: pointer to the HW struct + * @blk: hardware block + * @id: profile tracking ID + * + * This will search for a profile tracking ID which was previously added. + */ +static struct ice_prof_map * +ice_search_prof_id(struct ice_hw *hw, enum ice_block blk, u64 id) +{ + struct ice_prof_map *entry; + + mutex_lock(&hw->blk[blk].es.prof_map_lock); + entry = ice_search_prof_id_low(hw, blk, id); + mutex_unlock(&hw->blk[blk].es.prof_map_lock); + + return entry; +} + +/** + * ice_vsig_prof_id_count - count profiles in a VSIG + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsig: VSIG to remove the profile from + */ +static u16 +ice_vsig_prof_id_count(struct ice_hw *hw, enum ice_block blk, u16 vsig) +{ + u16 idx = vsig & ICE_VSIG_IDX_M, count = 0; + struct ice_vsig_prof *p; + + list_for_each_entry(p, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst, + list) + count++; + + return count; +} + +/** + * ice_rel_tcam_idx - release a TCAM index + * @hw: pointer to the HW struct + * @blk: hardware block + * @idx: the index to release + */ +static enum ice_status +ice_rel_tcam_idx(struct ice_hw *hw, enum ice_block blk, u16 idx) +{ + /* Masks to invoke a never match entry */ + u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFE, 0xFF, 0xFF, 0xFF, 0xFF }; + u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 }; + enum ice_status status; + + /* write the TCAM entry */ + status = ice_tcam_write_entry(hw, blk, idx, 0, 0, 0, 0, 0, vl_msk, + dc_msk, nm_msk); + if (status) + return status; + + /* release the TCAM entry */ + status = ice_free_tcam_ent(hw, blk, idx); + + return status; +} + +/** + * ice_rem_prof_id - remove one profile from a VSIG + * @hw: pointer to the HW struct + * @blk: hardware block + * @prof: pointer to profile structure to remove + */ +static enum ice_status +ice_rem_prof_id(struct ice_hw *hw, enum ice_block blk, + struct ice_vsig_prof *prof) +{ + enum ice_status status; + u16 i; + + for (i = 0; i < prof->tcam_count; i++) + if (prof->tcam[i].in_use) { + prof->tcam[i].in_use = false; + status = ice_rel_tcam_idx(hw, blk, + prof->tcam[i].tcam_idx); + if (status) + return ICE_ERR_HW_TABLE; + } + + return 0; +} + +/** + * ice_rem_vsig - remove VSIG + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsig: the VSIG to remove + * @chg: the change list + */ +static enum ice_status +ice_rem_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, + struct list_head *chg) +{ + u16 idx = vsig & ICE_VSIG_IDX_M; + struct ice_vsig_vsi *vsi_cur; + struct ice_vsig_prof *d, *t; + enum ice_status status; + + /* remove TCAM entries */ + list_for_each_entry_safe(d, t, + &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst, + list) { + status = ice_rem_prof_id(hw, blk, d); + if (status) + return status; + + list_del(&d->list); + devm_kfree(ice_hw_to_dev(hw), d); + } + + /* Move all VSIS associated with this VSIG to the default VSIG */ + vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi; + /* If the VSIG has at least 1 VSI then iterate through the list + * and remove the VSIs before deleting the group. + */ + if (vsi_cur) + do { + struct ice_vsig_vsi *tmp = vsi_cur->next_vsi; + struct ice_chs_chg *p; + + p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), + GFP_KERNEL); + if (!p) + return ICE_ERR_NO_MEMORY; + + p->type = ICE_VSIG_REM; + p->orig_vsig = vsig; + p->vsig = ICE_DEFAULT_VSIG; + p->vsi = vsi_cur - hw->blk[blk].xlt2.vsis; + + list_add(&p->list_entry, chg); + + vsi_cur = tmp; + } while (vsi_cur); + + return ice_vsig_free(hw, blk, vsig); +} + +/** + * ice_rem_prof_id_vsig - remove a specific profile from a VSIG + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsig: VSIG to remove the profile from + * @hdl: profile handle indicating which profile to remove + * @chg: list to receive a record of changes + */ +static enum ice_status +ice_rem_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, + struct list_head *chg) +{ + u16 idx = vsig & ICE_VSIG_IDX_M; + struct ice_vsig_prof *p, *t; + enum ice_status status; + + list_for_each_entry_safe(p, t, + &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst, + list) + if (p->profile_cookie == hdl) { + if (ice_vsig_prof_id_count(hw, blk, vsig) == 1) + /* this is the last profile, remove the VSIG */ + return ice_rem_vsig(hw, blk, vsig, chg); + + status = ice_rem_prof_id(hw, blk, p); + if (!status) { + list_del(&p->list); + devm_kfree(ice_hw_to_dev(hw), p); + } + return status; + } + + return ICE_ERR_DOES_NOT_EXIST; +} + +/** + * ice_rem_flow_all - remove all flows with a particular profile + * @hw: pointer to the HW struct + * @blk: hardware block + * @id: profile tracking ID + */ +static enum ice_status +ice_rem_flow_all(struct ice_hw *hw, enum ice_block blk, u64 id) +{ + struct ice_chs_chg *del, *tmp; + enum ice_status status; + struct list_head chg; + u16 i; + + INIT_LIST_HEAD(&chg); + + for (i = 1; i < ICE_MAX_VSIGS; i++) + if (hw->blk[blk].xlt2.vsig_tbl[i].in_use) { + if (ice_has_prof_vsig(hw, blk, i, id)) { + status = ice_rem_prof_id_vsig(hw, blk, i, id, + &chg); + if (status) + goto err_ice_rem_flow_all; + } + } + + status = ice_upd_prof_hw(hw, blk, &chg); + +err_ice_rem_flow_all: + list_for_each_entry_safe(del, tmp, &chg, list_entry) { + list_del(&del->list_entry); + devm_kfree(ice_hw_to_dev(hw), del); + } + + return status; +} + +/** + * ice_rem_prof - remove profile + * @hw: pointer to the HW struct + * @blk: hardware block + * @id: profile tracking ID + * + * This will remove the profile specified by the ID parameter, which was + * previously created through ice_add_prof. If any existing entries + * are associated with this profile, they will be removed as well. + */ +enum ice_status ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id) +{ + struct ice_prof_map *pmap; + enum ice_status status; + + mutex_lock(&hw->blk[blk].es.prof_map_lock); + + pmap = ice_search_prof_id_low(hw, blk, id); + if (!pmap) { + status = ICE_ERR_DOES_NOT_EXIST; + goto err_ice_rem_prof; + } + + /* remove all flows with this profile */ + status = ice_rem_flow_all(hw, blk, pmap->profile_cookie); + if (status) + goto err_ice_rem_prof; + + /* dereference profile, and possibly remove */ + ice_prof_dec_ref(hw, blk, pmap->prof_id); + + list_del(&pmap->list); + devm_kfree(ice_hw_to_dev(hw), pmap); + +err_ice_rem_prof: + mutex_unlock(&hw->blk[blk].es.prof_map_lock); + return status; +} + +/** + * ice_get_prof - get profile + * @hw: pointer to the HW struct + * @blk: hardware block + * @hdl: profile handle + * @chg: change list + */ +static enum ice_status +ice_get_prof(struct ice_hw *hw, enum ice_block blk, u64 hdl, + struct list_head *chg) +{ + struct ice_prof_map *map; + struct ice_chs_chg *p; + u16 i; + + /* Get the details on the profile specified by the handle ID */ + map = ice_search_prof_id(hw, blk, hdl); + if (!map) + return ICE_ERR_DOES_NOT_EXIST; + + for (i = 0; i < map->ptg_cnt; i++) + if (!hw->blk[blk].es.written[map->prof_id]) { + /* add ES to change list */ + p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), + GFP_KERNEL); + if (!p) + goto err_ice_get_prof; + + p->type = ICE_PTG_ES_ADD; + p->ptype = 0; + p->ptg = map->ptg[i]; + p->add_ptg = 0; + + p->add_prof = 1; + p->prof_id = map->prof_id; + + hw->blk[blk].es.written[map->prof_id] = true; + + list_add(&p->list_entry, chg); + } + + return 0; + +err_ice_get_prof: + /* let caller clean up the change list */ + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_get_profs_vsig - get a copy of the list of profiles from a VSIG + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsig: VSIG from which to copy the list + * @lst: output list + * + * This routine makes a copy of the list of profiles in the specified VSIG. + */ +static enum ice_status +ice_get_profs_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, + struct list_head *lst) +{ + struct ice_vsig_prof *ent1, *ent2; + u16 idx = vsig & ICE_VSIG_IDX_M; + + list_for_each_entry(ent1, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst, + list) { + struct ice_vsig_prof *p; + + /* copy to the input list */ + p = devm_kmemdup(ice_hw_to_dev(hw), ent1, sizeof(*p), + GFP_KERNEL); + if (!p) + goto err_ice_get_profs_vsig; + + list_add_tail(&p->list, lst); + } + + return 0; + +err_ice_get_profs_vsig: + list_for_each_entry_safe(ent1, ent2, lst, list) { + list_del(&ent1->list); + devm_kfree(ice_hw_to_dev(hw), ent1); + } + + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_add_prof_to_lst - add profile entry to a list + * @hw: pointer to the HW struct + * @blk: hardware block + * @lst: the list to be added to + * @hdl: profile handle of entry to add + */ +static enum ice_status +ice_add_prof_to_lst(struct ice_hw *hw, enum ice_block blk, + struct list_head *lst, u64 hdl) +{ + struct ice_prof_map *map; + struct ice_vsig_prof *p; + u16 i; + + map = ice_search_prof_id(hw, blk, hdl); + if (!map) + return ICE_ERR_DOES_NOT_EXIST; + + p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL); + if (!p) + return ICE_ERR_NO_MEMORY; + + p->profile_cookie = map->profile_cookie; + p->prof_id = map->prof_id; + p->tcam_count = map->ptg_cnt; + + for (i = 0; i < map->ptg_cnt; i++) { + p->tcam[i].prof_id = map->prof_id; + p->tcam[i].tcam_idx = ICE_INVALID_TCAM; + p->tcam[i].ptg = map->ptg[i]; + } + + list_add(&p->list, lst); + + return 0; +} + +/** + * ice_move_vsi - move VSI to another VSIG + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsi: the VSI to move + * @vsig: the VSIG to move the VSI to + * @chg: the change list + */ +static enum ice_status +ice_move_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig, + struct list_head *chg) +{ + enum ice_status status; + struct ice_chs_chg *p; + u16 orig_vsig; + + p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL); + if (!p) + return ICE_ERR_NO_MEMORY; + + status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig); + if (!status) + status = ice_vsig_add_mv_vsi(hw, blk, vsi, vsig); + + if (status) { + devm_kfree(ice_hw_to_dev(hw), p); + return status; + } + + p->type = ICE_VSI_MOVE; + p->vsi = vsi; + p->orig_vsig = orig_vsig; + p->vsig = vsig; + + list_add(&p->list_entry, chg); + + return 0; +} + +/** + * ice_prof_tcam_ena_dis - add enable or disable TCAM change + * @hw: pointer to the HW struct + * @blk: hardware block + * @enable: true to enable, false to disable + * @vsig: the VSIG of the TCAM entry + * @tcam: pointer the TCAM info structure of the TCAM to disable + * @chg: the change list + * + * This function appends an enable or disable TCAM entry in the change log + */ +static enum ice_status +ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable, + u16 vsig, struct ice_tcam_inf *tcam, + struct list_head *chg) +{ + enum ice_status status; + struct ice_chs_chg *p; + + /* Default: enable means change the low flag bit to don't care */ + u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 }; + u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 }; + u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 }; + + /* if disabling, free the TCAM */ + if (!enable) { + status = ice_free_tcam_ent(hw, blk, tcam->tcam_idx); + tcam->tcam_idx = 0; + tcam->in_use = 0; + return status; + } + + /* for re-enabling, reallocate a TCAM */ + status = ice_alloc_tcam_ent(hw, blk, &tcam->tcam_idx); + if (status) + return status; + + /* add TCAM to change list */ + p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL); + if (!p) + return ICE_ERR_NO_MEMORY; + + status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id, + tcam->ptg, vsig, 0, 0, vl_msk, dc_msk, + nm_msk); + if (status) + goto err_ice_prof_tcam_ena_dis; + + tcam->in_use = 1; + + p->type = ICE_TCAM_ADD; + p->add_tcam_idx = true; + p->prof_id = tcam->prof_id; + p->ptg = tcam->ptg; + p->vsig = 0; + p->tcam_idx = tcam->tcam_idx; + + /* log change */ + list_add(&p->list_entry, chg); + + return 0; + +err_ice_prof_tcam_ena_dis: + devm_kfree(ice_hw_to_dev(hw), p); + return status; +} + +/** + * ice_adj_prof_priorities - adjust profile based on priorities + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsig: the VSIG for which to adjust profile priorities + * @chg: the change list + */ +static enum ice_status +ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig, + struct list_head *chg) +{ + DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT); + struct ice_vsig_prof *t; + enum ice_status status; + u16 idx; + + bitmap_zero(ptgs_used, ICE_XLT1_CNT); + idx = vsig & ICE_VSIG_IDX_M; + + /* Priority is based on the order in which the profiles are added. The + * newest added profile has highest priority and the oldest added + * profile has the lowest priority. Since the profile property list for + * a VSIG is sorted from newest to oldest, this code traverses the list + * in order and enables the first of each PTG that it finds (that is not + * already enabled); it also disables any duplicate PTGs that it finds + * in the older profiles (that are currently enabled). + */ + + list_for_each_entry(t, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst, + list) { + u16 i; + + for (i = 0; i < t->tcam_count; i++) { + /* Scan the priorities from newest to oldest. + * Make sure that the newest profiles take priority. + */ + if (test_bit(t->tcam[i].ptg, ptgs_used) && + t->tcam[i].in_use) { + /* need to mark this PTG as never match, as it + * was already in use and therefore duplicate + * (and lower priority) + */ + status = ice_prof_tcam_ena_dis(hw, blk, false, + vsig, + &t->tcam[i], + chg); + if (status) + return status; + } else if (!test_bit(t->tcam[i].ptg, ptgs_used) && + !t->tcam[i].in_use) { + /* need to enable this PTG, as it in not in use + * and not enabled (highest priority) + */ + status = ice_prof_tcam_ena_dis(hw, blk, true, + vsig, + &t->tcam[i], + chg); + if (status) + return status; + } + + /* keep track of used ptgs */ + set_bit(t->tcam[i].ptg, ptgs_used); + } + } + + return 0; +} + +/** + * ice_add_prof_id_vsig - add profile to VSIG + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsig: the VSIG to which this profile is to be added + * @hdl: the profile handle indicating the profile to add + * @chg: the change list + */ +static enum ice_status +ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, + struct list_head *chg) +{ + /* Masks that ignore flags */ + u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 }; + u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 }; + struct ice_prof_map *map; + struct ice_vsig_prof *t; + struct ice_chs_chg *p; + u16 i; + + /* Get the details on the profile specified by the handle ID */ + map = ice_search_prof_id(hw, blk, hdl); + if (!map) + return ICE_ERR_DOES_NOT_EXIST; + + /* Error, if this VSIG already has this profile */ + if (ice_has_prof_vsig(hw, blk, vsig, hdl)) + return ICE_ERR_ALREADY_EXISTS; + + /* new VSIG profile structure */ + t = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*t), GFP_KERNEL); + if (!t) + return ICE_ERR_NO_MEMORY; + + t->profile_cookie = map->profile_cookie; + t->prof_id = map->prof_id; + t->tcam_count = map->ptg_cnt; + + /* create TCAM entries */ + for (i = 0; i < map->ptg_cnt; i++) { + enum ice_status status; + u16 tcam_idx; + + /* add TCAM to change list */ + p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL); + if (!p) + goto err_ice_add_prof_id_vsig; + + /* allocate the TCAM entry index */ + status = ice_alloc_tcam_ent(hw, blk, &tcam_idx); + if (status) { + devm_kfree(ice_hw_to_dev(hw), p); + goto err_ice_add_prof_id_vsig; + } + + t->tcam[i].ptg = map->ptg[i]; + t->tcam[i].prof_id = map->prof_id; + t->tcam[i].tcam_idx = tcam_idx; + t->tcam[i].in_use = true; + + p->type = ICE_TCAM_ADD; + p->add_tcam_idx = true; + p->prof_id = t->tcam[i].prof_id; + p->ptg = t->tcam[i].ptg; + p->vsig = vsig; + p->tcam_idx = t->tcam[i].tcam_idx; + + /* write the TCAM entry */ + status = ice_tcam_write_entry(hw, blk, t->tcam[i].tcam_idx, + t->tcam[i].prof_id, + t->tcam[i].ptg, vsig, 0, 0, + vl_msk, dc_msk, nm_msk); + if (status) + goto err_ice_add_prof_id_vsig; + + /* log change */ + list_add(&p->list_entry, chg); + } + + /* add profile to VSIG */ + list_add(&t->list, + &hw->blk[blk].xlt2.vsig_tbl[(vsig & ICE_VSIG_IDX_M)].prop_lst); + + return 0; + +err_ice_add_prof_id_vsig: + /* let caller clean up the change list */ + devm_kfree(ice_hw_to_dev(hw), t); + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_create_prof_id_vsig - add a new VSIG with a single profile + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsi: the initial VSI that will be in VSIG + * @hdl: the profile handle of the profile that will be added to the VSIG + * @chg: the change list + */ +static enum ice_status +ice_create_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl, + struct list_head *chg) +{ + enum ice_status status; + struct ice_chs_chg *p; + u16 new_vsig; + + p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL); + if (!p) + return ICE_ERR_NO_MEMORY; + + new_vsig = ice_vsig_alloc(hw, blk); + if (!new_vsig) { + status = ICE_ERR_HW_TABLE; + goto err_ice_create_prof_id_vsig; + } + + status = ice_move_vsi(hw, blk, vsi, new_vsig, chg); + if (status) + goto err_ice_create_prof_id_vsig; + + status = ice_add_prof_id_vsig(hw, blk, new_vsig, hdl, chg); + if (status) + goto err_ice_create_prof_id_vsig; + + p->type = ICE_VSIG_ADD; + p->vsi = vsi; + p->orig_vsig = ICE_DEFAULT_VSIG; + p->vsig = new_vsig; + + list_add(&p->list_entry, chg); + + return 0; + +err_ice_create_prof_id_vsig: + /* let caller clean up the change list */ + devm_kfree(ice_hw_to_dev(hw), p); + return status; +} + +/** + * ice_create_vsig_from_lst - create a new VSIG with a list of profiles + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsi: the initial VSI that will be in VSIG + * @lst: the list of profile that will be added to the VSIG + * @chg: the change list + */ +static enum ice_status +ice_create_vsig_from_lst(struct ice_hw *hw, enum ice_block blk, u16 vsi, + struct list_head *lst, struct list_head *chg) +{ + struct ice_vsig_prof *t; + enum ice_status status; + u16 vsig; + + vsig = ice_vsig_alloc(hw, blk); + if (!vsig) + return ICE_ERR_HW_TABLE; + + status = ice_move_vsi(hw, blk, vsi, vsig, chg); + if (status) + return status; + + list_for_each_entry(t, lst, list) { + status = ice_add_prof_id_vsig(hw, blk, vsig, t->profile_cookie, + chg); + if (status) + return status; + } + + return 0; +} + +/** + * ice_find_prof_vsig - find a VSIG with a specific profile handle + * @hw: pointer to the HW struct + * @blk: hardware block + * @hdl: the profile handle of the profile to search for + * @vsig: returns the VSIG with the matching profile + */ +static bool +ice_find_prof_vsig(struct ice_hw *hw, enum ice_block blk, u64 hdl, u16 *vsig) +{ + struct ice_vsig_prof *t; + enum ice_status status; + struct list_head lst; + + INIT_LIST_HEAD(&lst); + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return false; + + t->profile_cookie = hdl; + list_add(&t->list, &lst); + + status = ice_find_dup_props_vsig(hw, blk, &lst, vsig); + + list_del(&t->list); + kfree(t); + + return !status; +} + +/** + * ice_add_prof_id_flow - add profile flow + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsi: the VSI to enable with the profile specified by ID + * @hdl: profile handle + * + * Calling this function will update the hardware tables to enable the + * profile indicated by the ID parameter for the VSIs specified in the VSI + * array. Once successfully called, the flow will be enabled. + */ +enum ice_status +ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl) +{ + struct ice_vsig_prof *tmp1, *del1; + struct ice_chs_chg *tmp, *del; + struct list_head union_lst; + enum ice_status status; + struct list_head chg; + u16 vsig; + + INIT_LIST_HEAD(&union_lst); + INIT_LIST_HEAD(&chg); + + /* Get profile */ + status = ice_get_prof(hw, blk, hdl, &chg); + if (status) + return status; + + /* determine if VSI is already part of a VSIG */ + status = ice_vsig_find_vsi(hw, blk, vsi, &vsig); + if (!status && vsig) { + bool only_vsi; + u16 or_vsig; + u16 ref; + + /* found in VSIG */ + or_vsig = vsig; + + /* make sure that there is no overlap/conflict between the new + * characteristics and the existing ones; we don't support that + * scenario + */ + if (ice_has_prof_vsig(hw, blk, vsig, hdl)) { + status = ICE_ERR_ALREADY_EXISTS; + goto err_ice_add_prof_id_flow; + } + + /* last VSI in the VSIG? */ + status = ice_vsig_get_ref(hw, blk, vsig, &ref); + if (status) + goto err_ice_add_prof_id_flow; + only_vsi = (ref == 1); + + /* create a union of the current profiles and the one being + * added + */ + status = ice_get_profs_vsig(hw, blk, vsig, &union_lst); + if (status) + goto err_ice_add_prof_id_flow; + + status = ice_add_prof_to_lst(hw, blk, &union_lst, hdl); + if (status) + goto err_ice_add_prof_id_flow; + + /* search for an existing VSIG with an exact charc match */ + status = ice_find_dup_props_vsig(hw, blk, &union_lst, &vsig); + if (!status) { + /* move VSI to the VSIG that matches */ + status = ice_move_vsi(hw, blk, vsi, vsig, &chg); + if (status) + goto err_ice_add_prof_id_flow; + + /* VSI has been moved out of or_vsig. If the or_vsig had + * only that VSI it is now empty and can be removed. + */ + if (only_vsi) { + status = ice_rem_vsig(hw, blk, or_vsig, &chg); + if (status) + goto err_ice_add_prof_id_flow; + } + } else if (only_vsi) { + /* If the original VSIG only contains one VSI, then it + * will be the requesting VSI. In this case the VSI is + * not sharing entries and we can simply add the new + * profile to the VSIG. + */ + status = ice_add_prof_id_vsig(hw, blk, vsig, hdl, &chg); + if (status) + goto err_ice_add_prof_id_flow; + + /* Adjust priorities */ + status = ice_adj_prof_priorities(hw, blk, vsig, &chg); + if (status) + goto err_ice_add_prof_id_flow; + } else { + /* No match, so we need a new VSIG */ + status = ice_create_vsig_from_lst(hw, blk, vsi, + &union_lst, &chg); + if (status) + goto err_ice_add_prof_id_flow; + + /* Adjust priorities */ + status = ice_adj_prof_priorities(hw, blk, vsig, &chg); + if (status) + goto err_ice_add_prof_id_flow; + } + } else { + /* need to find or add a VSIG */ + /* search for an existing VSIG with an exact charc match */ + if (ice_find_prof_vsig(hw, blk, hdl, &vsig)) { + /* found an exact match */ + /* add or move VSI to the VSIG that matches */ + status = ice_move_vsi(hw, blk, vsi, vsig, &chg); + if (status) + goto err_ice_add_prof_id_flow; + } else { + /* we did not find an exact match */ + /* we need to add a VSIG */ + status = ice_create_prof_id_vsig(hw, blk, vsi, hdl, + &chg); + if (status) + goto err_ice_add_prof_id_flow; + } + } + + /* update hardware */ + if (!status) + status = ice_upd_prof_hw(hw, blk, &chg); + +err_ice_add_prof_id_flow: + list_for_each_entry_safe(del, tmp, &chg, list_entry) { + list_del(&del->list_entry); + devm_kfree(ice_hw_to_dev(hw), del); + } + + list_for_each_entry_safe(del1, tmp1, &union_lst, list) { + list_del(&del1->list); + devm_kfree(ice_hw_to_dev(hw), del1); + } + + return status; +} + +/** + * ice_rem_prof_from_list - remove a profile from list + * @hw: pointer to the HW struct + * @lst: list to remove the profile from + * @hdl: the profile handle indicating the profile to remove + */ +static enum ice_status +ice_rem_prof_from_list(struct ice_hw *hw, struct list_head *lst, u64 hdl) +{ + struct ice_vsig_prof *ent, *tmp; + + list_for_each_entry_safe(ent, tmp, lst, list) + if (ent->profile_cookie == hdl) { + list_del(&ent->list); + devm_kfree(ice_hw_to_dev(hw), ent); + return 0; + } + + return ICE_ERR_DOES_NOT_EXIST; +} + +/** + * ice_rem_prof_id_flow - remove flow + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsi: the VSI from which to remove the profile specified by ID + * @hdl: profile tracking handle + * + * Calling this function will update the hardware tables to remove the + * profile indicated by the ID parameter for the VSIs specified in the VSI + * array. Once successfully called, the flow will be disabled. + */ +enum ice_status +ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl) +{ + struct ice_vsig_prof *tmp1, *del1; + struct ice_chs_chg *tmp, *del; + struct list_head chg, copy; + enum ice_status status; + u16 vsig; + + INIT_LIST_HEAD(©); + INIT_LIST_HEAD(&chg); + + /* determine if VSI is already part of a VSIG */ + status = ice_vsig_find_vsi(hw, blk, vsi, &vsig); + if (!status && vsig) { + bool last_profile; + bool only_vsi; + u16 ref; + + /* found in VSIG */ + last_profile = ice_vsig_prof_id_count(hw, blk, vsig) == 1; + status = ice_vsig_get_ref(hw, blk, vsig, &ref); + if (status) + goto err_ice_rem_prof_id_flow; + only_vsi = (ref == 1); + + if (only_vsi) { + /* If the original VSIG only contains one reference, + * which will be the requesting VSI, then the VSI is not + * sharing entries and we can simply remove the specific + * characteristics from the VSIG. + */ + + if (last_profile) { + /* If there are no profiles left for this VSIG, + * then simply remove the the VSIG. + */ + status = ice_rem_vsig(hw, blk, vsig, &chg); + if (status) + goto err_ice_rem_prof_id_flow; + } else { + status = ice_rem_prof_id_vsig(hw, blk, vsig, + hdl, &chg); + if (status) + goto err_ice_rem_prof_id_flow; + + /* Adjust priorities */ + status = ice_adj_prof_priorities(hw, blk, vsig, + &chg); + if (status) + goto err_ice_rem_prof_id_flow; + } + + } else { + /* Make a copy of the VSIG's list of Profiles */ + status = ice_get_profs_vsig(hw, blk, vsig, ©); + if (status) + goto err_ice_rem_prof_id_flow; + + /* Remove specified profile entry from the list */ + status = ice_rem_prof_from_list(hw, ©, hdl); + if (status) + goto err_ice_rem_prof_id_flow; + + if (list_empty(©)) { + status = ice_move_vsi(hw, blk, vsi, + ICE_DEFAULT_VSIG, &chg); + if (status) + goto err_ice_rem_prof_id_flow; + + } else if (!ice_find_dup_props_vsig(hw, blk, ©, + &vsig)) { + /* found an exact match */ + /* add or move VSI to the VSIG that matches */ + /* Search for a VSIG with a matching profile + * list + */ + + /* Found match, move VSI to the matching VSIG */ + status = ice_move_vsi(hw, blk, vsi, vsig, &chg); + if (status) + goto err_ice_rem_prof_id_flow; + } else { + /* since no existing VSIG supports this + * characteristic pattern, we need to create a + * new VSIG and TCAM entries + */ + status = ice_create_vsig_from_lst(hw, blk, vsi, + ©, &chg); + if (status) + goto err_ice_rem_prof_id_flow; + + /* Adjust priorities */ + status = ice_adj_prof_priorities(hw, blk, vsig, + &chg); + if (status) + goto err_ice_rem_prof_id_flow; + } + } + } else { + status = ICE_ERR_DOES_NOT_EXIST; + } + + /* update hardware tables */ + if (!status) + status = ice_upd_prof_hw(hw, blk, &chg); + +err_ice_rem_prof_id_flow: + list_for_each_entry_safe(del, tmp, &chg, list_entry) { + list_del(&del->list_entry); + devm_kfree(ice_hw_to_dev(hw), del); + } + + list_for_each_entry_safe(del1, tmp1, ©, list) { + list_del(&del1->list); + devm_kfree(ice_hw_to_dev(hw), del1); + } + + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h new file mode 100644 index 000000000000..c7b5e1a6ea2b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_FLEX_PIPE_H_ +#define _ICE_FLEX_PIPE_H_ + +#include "ice_type.h" + +/* Package minimal version supported */ +#define ICE_PKG_SUPP_VER_MAJ 1 +#define ICE_PKG_SUPP_VER_MNR 3 + +/* Package format version */ +#define ICE_PKG_FMT_VER_MAJ 1 +#define ICE_PKG_FMT_VER_MNR 0 +#define ICE_PKG_FMT_VER_UPD 0 +#define ICE_PKG_FMT_VER_DFT 0 + +#define ICE_PKG_CNT 4 + +enum ice_status +ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], + struct ice_fv_word *es); +enum ice_status +ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl); +enum ice_status +ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl); +enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len); +enum ice_status +ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len); +enum ice_status ice_init_hw_tbls(struct ice_hw *hw); +void ice_free_seg(struct ice_hw *hw); +void ice_fill_blk_tbls(struct ice_hw *hw); +void ice_clear_hw_tbls(struct ice_hw *hw); +void ice_free_hw_tbls(struct ice_hw *hw); +enum ice_status +ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id); +#endif /* _ICE_FLEX_PIPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h new file mode 100644 index 000000000000..0fb3fe3ff3ea --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -0,0 +1,486 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_FLEX_TYPE_H_ +#define _ICE_FLEX_TYPE_H_ + +#define ICE_FV_OFFSET_INVAL 0x1FF + +/* Extraction Sequence (Field Vector) Table */ +struct ice_fv_word { + u8 prot_id; + u16 off; /* Offset within the protocol header */ + u8 resvrd; +} __packed; + +#define ICE_MAX_FV_WORDS 48 +struct ice_fv { + struct ice_fv_word ew[ICE_MAX_FV_WORDS]; +}; + +/* Package and segment headers and tables */ +struct ice_pkg_hdr { + struct ice_pkg_ver format_ver; + __le32 seg_count; + __le32 seg_offset[1]; +}; + +/* generic segment */ +struct ice_generic_seg_hdr { +#define SEGMENT_TYPE_METADATA 0x00000001 +#define SEGMENT_TYPE_ICE 0x00000010 + __le32 seg_type; + struct ice_pkg_ver seg_ver; + __le32 seg_size; + char seg_name[ICE_PKG_NAME_SIZE]; +}; + +/* ice specific segment */ + +union ice_device_id { + struct { + __le16 device_id; + __le16 vendor_id; + } dev_vend_id; + __le32 id; +}; + +struct ice_device_id_entry { + union ice_device_id device; + union ice_device_id sub_device; +}; + +struct ice_seg { + struct ice_generic_seg_hdr hdr; + __le32 device_table_count; + struct ice_device_id_entry device_table[1]; +}; + +struct ice_nvm_table { + __le32 table_count; + __le32 vers[1]; +}; + +struct ice_buf { +#define ICE_PKG_BUF_SIZE 4096 + u8 buf[ICE_PKG_BUF_SIZE]; +}; + +struct ice_buf_table { + __le32 buf_count; + struct ice_buf buf_array[1]; +}; + +/* global metadata specific segment */ +struct ice_global_metadata_seg { + struct ice_generic_seg_hdr hdr; + struct ice_pkg_ver pkg_ver; + __le32 track_id; + char pkg_name[ICE_PKG_NAME_SIZE]; +}; + +#define ICE_MIN_S_OFF 12 +#define ICE_MAX_S_OFF 4095 +#define ICE_MIN_S_SZ 1 +#define ICE_MAX_S_SZ 4084 + +/* section information */ +struct ice_section_entry { + __le32 type; + __le16 offset; + __le16 size; +}; + +#define ICE_MIN_S_COUNT 1 +#define ICE_MAX_S_COUNT 511 +#define ICE_MIN_S_DATA_END 12 +#define ICE_MAX_S_DATA_END 4096 + +#define ICE_METADATA_BUF 0x80000000 + +struct ice_buf_hdr { + __le16 section_count; + __le16 data_end; + struct ice_section_entry section_entry[1]; +}; + +#define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz) ((ICE_PKG_BUF_SIZE - \ + sizeof(struct ice_buf_hdr) - (hd_sz)) / (ent_sz)) + +/* ice package section IDs */ +#define ICE_SID_XLT0_SW 10 +#define ICE_SID_XLT_KEY_BUILDER_SW 11 +#define ICE_SID_XLT1_SW 12 +#define ICE_SID_XLT2_SW 13 +#define ICE_SID_PROFID_TCAM_SW 14 +#define ICE_SID_PROFID_REDIR_SW 15 +#define ICE_SID_FLD_VEC_SW 16 +#define ICE_SID_CDID_KEY_BUILDER_SW 17 +#define ICE_SID_CDID_REDIR_SW 18 + +#define ICE_SID_XLT0_ACL 20 +#define ICE_SID_XLT_KEY_BUILDER_ACL 21 +#define ICE_SID_XLT1_ACL 22 +#define ICE_SID_XLT2_ACL 23 +#define ICE_SID_PROFID_TCAM_ACL 24 +#define ICE_SID_PROFID_REDIR_ACL 25 +#define ICE_SID_FLD_VEC_ACL 26 +#define ICE_SID_CDID_KEY_BUILDER_ACL 27 +#define ICE_SID_CDID_REDIR_ACL 28 + +#define ICE_SID_XLT0_FD 30 +#define ICE_SID_XLT_KEY_BUILDER_FD 31 +#define ICE_SID_XLT1_FD 32 +#define ICE_SID_XLT2_FD 33 +#define ICE_SID_PROFID_TCAM_FD 34 +#define ICE_SID_PROFID_REDIR_FD 35 +#define ICE_SID_FLD_VEC_FD 36 +#define ICE_SID_CDID_KEY_BUILDER_FD 37 +#define ICE_SID_CDID_REDIR_FD 38 + +#define ICE_SID_XLT0_RSS 40 +#define ICE_SID_XLT_KEY_BUILDER_RSS 41 +#define ICE_SID_XLT1_RSS 42 +#define ICE_SID_XLT2_RSS 43 +#define ICE_SID_PROFID_TCAM_RSS 44 +#define ICE_SID_PROFID_REDIR_RSS 45 +#define ICE_SID_FLD_VEC_RSS 46 +#define ICE_SID_CDID_KEY_BUILDER_RSS 47 +#define ICE_SID_CDID_REDIR_RSS 48 + +#define ICE_SID_RXPARSER_BOOST_TCAM 56 + +#define ICE_SID_XLT0_PE 80 +#define ICE_SID_XLT_KEY_BUILDER_PE 81 +#define ICE_SID_XLT1_PE 82 +#define ICE_SID_XLT2_PE 83 +#define ICE_SID_PROFID_TCAM_PE 84 +#define ICE_SID_PROFID_REDIR_PE 85 +#define ICE_SID_FLD_VEC_PE 86 +#define ICE_SID_CDID_KEY_BUILDER_PE 87 +#define ICE_SID_CDID_REDIR_PE 88 + +/* Label Metadata section IDs */ +#define ICE_SID_LBL_FIRST 0x80000010 +#define ICE_SID_LBL_RXPARSER_TMEM 0x80000018 +/* The following define MUST be updated to reflect the last label section ID */ +#define ICE_SID_LBL_LAST 0x80000038 + +enum ice_block { + ICE_BLK_SW = 0, + ICE_BLK_ACL, + ICE_BLK_FD, + ICE_BLK_RSS, + ICE_BLK_PE, + ICE_BLK_COUNT +}; + +enum ice_sect { + ICE_XLT0 = 0, + ICE_XLT_KB, + ICE_XLT1, + ICE_XLT2, + ICE_PROF_TCAM, + ICE_PROF_REDIR, + ICE_VEC_TBL, + ICE_CDID_KB, + ICE_CDID_REDIR, + ICE_SECT_COUNT +}; + +/* package labels */ +struct ice_label { + __le16 value; +#define ICE_PKG_LABEL_SIZE 64 + char name[ICE_PKG_LABEL_SIZE]; +}; + +struct ice_label_section { + __le16 count; + struct ice_label label[1]; +}; + +#define ICE_MAX_LABELS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \ + sizeof(struct ice_label_section) - sizeof(struct ice_label), \ + sizeof(struct ice_label)) + +struct ice_sw_fv_section { + __le16 count; + __le16 base_offset; + struct ice_fv fv[1]; +}; + +/* The BOOST TCAM stores the match packet header in reverse order, meaning + * the fields are reversed; in addition, this means that the normally big endian + * fields of the packet are now little endian. + */ +struct ice_boost_key_value { +#define ICE_BOOST_REMAINING_HV_KEY 15 + u8 remaining_hv_key[ICE_BOOST_REMAINING_HV_KEY]; + __le16 hv_dst_port_key; + __le16 hv_src_port_key; + u8 tcam_search_key; +} __packed; + +struct ice_boost_key { + struct ice_boost_key_value key; + struct ice_boost_key_value key2; +}; + +/* package Boost TCAM entry */ +struct ice_boost_tcam_entry { + __le16 addr; + __le16 reserved; + /* break up the 40 bytes of key into different fields */ + struct ice_boost_key key; + u8 boost_hit_index_group; + /* The following contains bitfields which are not on byte boundaries. + * These fields are currently unused by driver software. + */ +#define ICE_BOOST_BIT_FIELDS 43 + u8 bit_fields[ICE_BOOST_BIT_FIELDS]; +}; + +struct ice_boost_tcam_section { + __le16 count; + __le16 reserved; + struct ice_boost_tcam_entry tcam[1]; +}; + +#define ICE_MAX_BST_TCAMS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \ + sizeof(struct ice_boost_tcam_section) - \ + sizeof(struct ice_boost_tcam_entry), \ + sizeof(struct ice_boost_tcam_entry)) + +struct ice_xlt1_section { + __le16 count; + __le16 offset; + u8 value[1]; +} __packed; + +struct ice_xlt2_section { + __le16 count; + __le16 offset; + __le16 value[1]; +}; + +struct ice_prof_redir_section { + __le16 count; + __le16 offset; + u8 redir_value[1]; +}; + +/* package buffer building */ + +struct ice_buf_build { + struct ice_buf buf; + u16 reserved_section_table_entries; +}; + +struct ice_pkg_enum { + struct ice_buf_table *buf_table; + u32 buf_idx; + + u32 type; + struct ice_buf_hdr *buf; + u32 sect_idx; + void *sect; + u32 sect_type; + + u32 entry_idx; + void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset); +}; + +struct ice_pkg_es { + __le16 count; + __le16 offset; + struct ice_fv_word es[1]; +}; + +struct ice_es { + u32 sid; + u16 count; + u16 fvw; + u16 *ref_count; + struct list_head prof_map; + struct ice_fv_word *t; + struct mutex prof_map_lock; /* protect access to profiles list */ + u8 *written; + u8 reverse; /* set to true to reverse FV order */ +}; + +/* PTYPE Group management */ + +/* Note: XLT1 table takes 13-bit as input, and results in an 8-bit packet type + * group (PTG) ID as output. + * + * Note: PTG 0 is the default packet type group and it is assumed that all PTYPE + * are a part of this group until moved to a new PTG. + */ +#define ICE_DEFAULT_PTG 0 + +struct ice_ptg_entry { + struct ice_ptg_ptype *first_ptype; + u8 in_use; +}; + +struct ice_ptg_ptype { + struct ice_ptg_ptype *next_ptype; + u8 ptg; +}; + +#define ICE_MAX_TCAM_PER_PROFILE 32 +#define ICE_MAX_PTG_PER_PROFILE 32 + +struct ice_prof_map { + struct list_head list; + u64 profile_cookie; + u64 context; + u8 prof_id; + u8 ptg_cnt; + u8 ptg[ICE_MAX_PTG_PER_PROFILE]; +}; + +#define ICE_INVALID_TCAM 0xFFFF + +struct ice_tcam_inf { + u16 tcam_idx; + u8 ptg; + u8 prof_id; + u8 in_use; +}; + +struct ice_vsig_prof { + struct list_head list; + u64 profile_cookie; + u8 prof_id; + u8 tcam_count; + struct ice_tcam_inf tcam[ICE_MAX_TCAM_PER_PROFILE]; +}; + +struct ice_vsig_entry { + struct list_head prop_lst; + struct ice_vsig_vsi *first_vsi; + u8 in_use; +}; + +struct ice_vsig_vsi { + struct ice_vsig_vsi *next_vsi; + u32 prop_mask; + u16 changed; + u16 vsig; +}; + +#define ICE_XLT1_CNT 1024 +#define ICE_MAX_PTGS 256 + +/* XLT1 Table */ +struct ice_xlt1 { + struct ice_ptg_entry *ptg_tbl; + struct ice_ptg_ptype *ptypes; + u8 *t; + u32 sid; + u16 count; +}; + +#define ICE_XLT2_CNT 768 +#define ICE_MAX_VSIGS 768 + +/* VSIG bit layout: + * [0:12]: incremental VSIG index 1 to ICE_MAX_VSIGS + * [13:15]: PF number of device + */ +#define ICE_VSIG_IDX_M (0x1FFF) +#define ICE_PF_NUM_S 13 +#define ICE_PF_NUM_M (0x07 << ICE_PF_NUM_S) +#define ICE_VSIG_VALUE(vsig, pf_id) \ + (u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \ + (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M)) +#define ICE_DEFAULT_VSIG 0 + +/* XLT2 Table */ +struct ice_xlt2 { + struct ice_vsig_entry *vsig_tbl; + struct ice_vsig_vsi *vsis; + u16 *t; + u32 sid; + u16 count; +}; + +/* Profile ID Management */ +struct ice_prof_id_key { + __le16 flags; + u8 xlt1; + __le16 xlt2_cdid; +} __packed; + +/* Keys are made up of two values, each one-half the size of the key. + * For TCAM, the entire key is 80 bits wide (or 2, 40-bit wide values) + */ +#define ICE_TCAM_KEY_VAL_SZ 5 +#define ICE_TCAM_KEY_SZ (2 * ICE_TCAM_KEY_VAL_SZ) + +struct ice_prof_tcam_entry { + __le16 addr; + u8 key[ICE_TCAM_KEY_SZ]; + u8 prof_id; +} __packed; + +struct ice_prof_id_section { + __le16 count; + struct ice_prof_tcam_entry entry[1]; +} __packed; + +struct ice_prof_tcam { + u32 sid; + u16 count; + u16 max_prof_id; + struct ice_prof_tcam_entry *t; + u8 cdid_bits; /* # CDID bits to use in key, 0, 2, 4, or 8 */ +}; + +struct ice_prof_redir { + u8 *t; + u32 sid; + u16 count; +}; + +/* Tables per block */ +struct ice_blk_info { + struct ice_xlt1 xlt1; + struct ice_xlt2 xlt2; + struct ice_prof_tcam prof; + struct ice_prof_redir prof_redir; + struct ice_es es; + u8 overwrite; /* set to true to allow overwrite of table entries */ + u8 is_list_init; +}; + +enum ice_chg_type { + ICE_TCAM_NONE = 0, + ICE_PTG_ES_ADD, + ICE_TCAM_ADD, + ICE_VSIG_ADD, + ICE_VSIG_REM, + ICE_VSI_MOVE, +}; + +struct ice_chs_chg { + struct list_head list_entry; + enum ice_chg_type type; + + u8 add_ptg; + u8 add_vsig; + u8 add_tcam_idx; + u8 add_prof; + u16 ptype; + u8 ptg; + u8 prof_id; + u16 vsi; + u16 vsig; + u16 orig_vsig; + u16 tcam_idx; +}; + +#define ICE_FLOW_PTYPE_MAX ICE_XLT1_CNT +#endif /* _ICE_FLEX_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c new file mode 100644 index 000000000000..a05ceb59863b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -0,0 +1,1275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice_common.h" +#include "ice_flow.h" + +/* Describe properties of a protocol header field */ +struct ice_flow_field_info { + enum ice_flow_seg_hdr hdr; + s16 off; /* Offset from start of a protocol header, in bits */ + u16 size; /* Size of fields in bits */ +}; + +#define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \ + .hdr = _hdr, \ + .off = (_offset_bytes) * BITS_PER_BYTE, \ + .size = (_size_bytes) * BITS_PER_BYTE, \ +} + +/* Table containing properties of supported protocol header fields */ +static const +struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { + /* IPv4 / IPv6 */ + /* ICE_FLOW_FIELD_IDX_IPV4_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)), + /* ICE_FLOW_FIELD_IDX_IPV4_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 16, sizeof(struct in_addr)), + /* ICE_FLOW_FIELD_IDX_IPV6_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, sizeof(struct in6_addr)), + /* ICE_FLOW_FIELD_IDX_IPV6_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, sizeof(struct in6_addr)), + /* Transport */ + /* ICE_FLOW_FIELD_IDX_TCP_SRC_PORT */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 0, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_TCP_DST_PORT */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 2, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_UDP_SRC_PORT */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 0, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_UDP_DST_PORT */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 2, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)), + +}; + +/* Bitmaps indicating relevant packet types for a particular protocol header + * + * Packet types for packets with an Outer/First/Single IPv4 header + */ +static const u32 ice_ptypes_ipv4_ofos[] = { + 0x1DC00000, 0x04000800, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Innermost/Last IPv4 header */ +static const u32 ice_ptypes_ipv4_il[] = { + 0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B, + 0x0000000E, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Outer/First/Single IPv6 header */ +static const u32 ice_ptypes_ipv6_ofos[] = { + 0x00000000, 0x00000000, 0x77000000, 0x10002000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Innermost/Last IPv6 header */ +static const u32 ice_ptypes_ipv6_il[] = { + 0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000, + 0x00000770, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* UDP Packet types for non-tunneled packets or tunneled + * packets with inner UDP. + */ +static const u32 ice_ptypes_udp_il[] = { + 0x81000000, 0x20204040, 0x04000010, 0x80810102, + 0x00000040, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Innermost/Last TCP header */ +static const u32 ice_ptypes_tcp_il[] = { + 0x04000000, 0x80810102, 0x10000040, 0x02040408, + 0x00000102, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Innermost/Last SCTP header */ +static const u32 ice_ptypes_sctp_il[] = { + 0x08000000, 0x01020204, 0x20000081, 0x04080810, + 0x00000204, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Manage parameters and info. used during the creation of a flow profile */ +struct ice_flow_prof_params { + enum ice_block blk; + u16 entry_length; /* # of bytes formatted entry will require */ + u8 es_cnt; + struct ice_flow_prof *prof; + + /* For ACL, the es[0] will have the data of ICE_RX_MDID_PKT_FLAGS_15_0 + * This will give us the direction flags. + */ + struct ice_fv_word es[ICE_MAX_FV_WORDS]; + DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX); +}; + +#define ICE_FLOW_SEG_HDRS_L3_MASK \ + (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6) +#define ICE_FLOW_SEG_HDRS_L4_MASK \ + (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP) + +/** + * ice_flow_val_hdrs - validates packet segments for valid protocol headers + * @segs: array of one or more packet segments that describe the flow + * @segs_cnt: number of packet segments provided + */ +static enum ice_status +ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt) +{ + u8 i; + + for (i = 0; i < segs_cnt; i++) { + /* Multiple L3 headers */ + if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK && + !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK)) + return ICE_ERR_PARAM; + + /* Multiple L4 headers */ + if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK && + !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) + return ICE_ERR_PARAM; + } + + return 0; +} + +/** + * ice_flow_proc_seg_hdrs - process protocol headers present in pkt segments + * @params: information about the flow to be processed + * + * This function identifies the packet types associated with the protocol + * headers being present in packet segments of the specified flow profile. + */ +static enum ice_status +ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) +{ + struct ice_flow_prof *prof; + u8 i; + + memset(params->ptypes, 0xff, sizeof(params->ptypes)); + + prof = params->prof; + + for (i = 0; i < params->prof->segs_cnt; i++) { + const unsigned long *src; + u32 hdrs; + + hdrs = prof->segs[i].hdrs; + + if (hdrs & ICE_FLOW_SEG_HDR_IPV4) { + src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos : + (const unsigned long *)ice_ptypes_ipv4_il; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) { + src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos : + (const unsigned long *)ice_ptypes_ipv6_il; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } + + if (hdrs & ICE_FLOW_SEG_HDR_UDP) { + src = (const unsigned long *)ice_ptypes_udp_il; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_TCP) { + bitmap_and(params->ptypes, params->ptypes, + (const unsigned long *)ice_ptypes_tcp_il, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) { + src = (const unsigned long *)ice_ptypes_sctp_il; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } + } + + return 0; +} + +/** + * ice_flow_xtract_fld - Create an extraction sequence entry for the given field + * @hw: pointer to the HW struct + * @params: information about the flow to be processed + * @seg: packet segment index of the field to be extracted + * @fld: ID of field to be extracted + * + * This function determines the protocol ID, offset, and size of the given + * field. It then allocates one or more extraction sequence entries for the + * given field, and fill the entries with protocol ID and offset information. + */ +static enum ice_status +ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params, + u8 seg, enum ice_flow_field fld) +{ + enum ice_prot_id prot_id = ICE_PROT_ID_INVAL; + u8 fv_words = hw->blk[params->blk].es.fvw; + struct ice_flow_fld_info *flds; + u16 cnt, ese_bits, i; + u16 off; + + flds = params->prof->segs[seg].fields; + + switch (fld) { + case ICE_FLOW_FIELD_IDX_IPV4_SA: + case ICE_FLOW_FIELD_IDX_IPV4_DA: + prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL; + break; + case ICE_FLOW_FIELD_IDX_IPV6_SA: + case ICE_FLOW_FIELD_IDX_IPV6_DA: + prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL; + break; + case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT: + case ICE_FLOW_FIELD_IDX_TCP_DST_PORT: + prot_id = ICE_PROT_TCP_IL; + break; + case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT: + case ICE_FLOW_FIELD_IDX_UDP_DST_PORT: + prot_id = ICE_PROT_UDP_IL_OR_S; + break; + case ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT: + case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT: + prot_id = ICE_PROT_SCTP_IL; + break; + default: + return ICE_ERR_NOT_IMPL; + } + + /* Each extraction sequence entry is a word in size, and extracts a + * word-aligned offset from a protocol header. + */ + ese_bits = ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE; + + flds[fld].xtrct.prot_id = prot_id; + flds[fld].xtrct.off = (ice_flds_info[fld].off / ese_bits) * + ICE_FLOW_FV_EXTRACT_SZ; + flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits); + flds[fld].xtrct.idx = params->es_cnt; + + /* Adjust the next field-entry index after accommodating the number of + * entries this field consumes + */ + cnt = DIV_ROUND_UP(flds[fld].xtrct.disp + ice_flds_info[fld].size, + ese_bits); + + /* Fill in the extraction sequence entries needed for this field */ + off = flds[fld].xtrct.off; + for (i = 0; i < cnt; i++) { + u8 idx; + + /* Make sure the number of extraction sequence required + * does not exceed the block's capability + */ + if (params->es_cnt >= fv_words) + return ICE_ERR_MAX_LIMIT; + + /* some blocks require a reversed field vector layout */ + if (hw->blk[params->blk].es.reverse) + idx = fv_words - params->es_cnt - 1; + else + idx = params->es_cnt; + + params->es[idx].prot_id = prot_id; + params->es[idx].off = off; + params->es_cnt++; + + off += ICE_FLOW_FV_EXTRACT_SZ; + } + + return 0; +} + +/** + * ice_flow_create_xtrct_seq - Create an extraction sequence for given segments + * @hw: pointer to the HW struct + * @params: information about the flow to be processed + * + * This function iterates through all matched fields in the given segments, and + * creates an extraction sequence for the fields. + */ +static enum ice_status +ice_flow_create_xtrct_seq(struct ice_hw *hw, + struct ice_flow_prof_params *params) +{ + struct ice_flow_prof *prof = params->prof; + enum ice_status status = 0; + u8 i; + + for (i = 0; i < prof->segs_cnt; i++) { + u8 j; + + for_each_set_bit(j, (unsigned long *)&prof->segs[i].match, + ICE_FLOW_FIELD_IDX_MAX) { + status = ice_flow_xtract_fld(hw, params, i, + (enum ice_flow_field)j); + if (status) + return status; + } + } + + return status; +} + +/** + * ice_flow_proc_segs - process all packet segments associated with a profile + * @hw: pointer to the HW struct + * @params: information about the flow to be processed + */ +static enum ice_status +ice_flow_proc_segs(struct ice_hw *hw, struct ice_flow_prof_params *params) +{ + enum ice_status status; + + status = ice_flow_proc_seg_hdrs(params); + if (status) + return status; + + status = ice_flow_create_xtrct_seq(hw, params); + if (status) + return status; + + switch (params->blk) { + case ICE_BLK_RSS: + /* Only header information is provided for RSS configuration. + * No further processing is needed. + */ + status = 0; + break; + default: + return ICE_ERR_NOT_IMPL; + } + + return status; +} + +#define ICE_FLOW_FIND_PROF_CHK_FLDS 0x00000001 +#define ICE_FLOW_FIND_PROF_CHK_VSI 0x00000002 +#define ICE_FLOW_FIND_PROF_NOT_CHK_DIR 0x00000004 + +/** + * ice_flow_find_prof_conds - Find a profile matching headers and conditions + * @hw: pointer to the HW struct + * @blk: classification stage + * @dir: flow direction + * @segs: array of one or more packet segments that describe the flow + * @segs_cnt: number of packet segments provided + * @vsi_handle: software VSI handle to check VSI (ICE_FLOW_FIND_PROF_CHK_VSI) + * @conds: additional conditions to be checked (ICE_FLOW_FIND_PROF_CHK_*) + */ +static struct ice_flow_prof * +ice_flow_find_prof_conds(struct ice_hw *hw, enum ice_block blk, + enum ice_flow_dir dir, struct ice_flow_seg_info *segs, + u8 segs_cnt, u16 vsi_handle, u32 conds) +{ + struct ice_flow_prof *p, *prof = NULL; + + mutex_lock(&hw->fl_profs_locks[blk]); + list_for_each_entry(p, &hw->fl_profs[blk], l_entry) + if ((p->dir == dir || conds & ICE_FLOW_FIND_PROF_NOT_CHK_DIR) && + segs_cnt && segs_cnt == p->segs_cnt) { + u8 i; + + /* Check for profile-VSI association if specified */ + if ((conds & ICE_FLOW_FIND_PROF_CHK_VSI) && + ice_is_vsi_valid(hw, vsi_handle) && + !test_bit(vsi_handle, p->vsis)) + continue; + + /* Protocol headers must be checked. Matched fields are + * checked if specified. + */ + for (i = 0; i < segs_cnt; i++) + if (segs[i].hdrs != p->segs[i].hdrs || + ((conds & ICE_FLOW_FIND_PROF_CHK_FLDS) && + segs[i].match != p->segs[i].match)) + break; + + /* A match is found if all segments are matched */ + if (i == segs_cnt) { + prof = p; + break; + } + } + mutex_unlock(&hw->fl_profs_locks[blk]); + + return prof; +} + +/** + * ice_flow_find_prof_id - Look up a profile with given profile ID + * @hw: pointer to the HW struct + * @blk: classification stage + * @prof_id: unique ID to identify this flow profile + */ +static struct ice_flow_prof * +ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id) +{ + struct ice_flow_prof *p; + + list_for_each_entry(p, &hw->fl_profs[blk], l_entry) + if (p->id == prof_id) + return p; + + return NULL; +} + +/** + * ice_flow_add_prof_sync - Add a flow profile for packet segments and fields + * @hw: pointer to the HW struct + * @blk: classification stage + * @dir: flow direction + * @prof_id: unique ID to identify this flow profile + * @segs: array of one or more packet segments that describe the flow + * @segs_cnt: number of packet segments provided + * @prof: stores the returned flow profile added + * + * Assumption: the caller has acquired the lock to the profile list + */ +static enum ice_status +ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk, + enum ice_flow_dir dir, u64 prof_id, + struct ice_flow_seg_info *segs, u8 segs_cnt, + struct ice_flow_prof **prof) +{ + struct ice_flow_prof_params params; + enum ice_status status; + u8 i; + + if (!prof) + return ICE_ERR_BAD_PTR; + + memset(¶ms, 0, sizeof(params)); + params.prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*params.prof), + GFP_KERNEL); + if (!params.prof) + return ICE_ERR_NO_MEMORY; + + /* initialize extraction sequence to all invalid (0xff) */ + for (i = 0; i < ICE_MAX_FV_WORDS; i++) { + params.es[i].prot_id = ICE_PROT_INVALID; + params.es[i].off = ICE_FV_OFFSET_INVAL; + } + + params.blk = blk; + params.prof->id = prof_id; + params.prof->dir = dir; + params.prof->segs_cnt = segs_cnt; + + /* Make a copy of the segments that need to be persistent in the flow + * profile instance + */ + for (i = 0; i < segs_cnt; i++) + memcpy(¶ms.prof->segs[i], &segs[i], sizeof(*segs)); + + status = ice_flow_proc_segs(hw, ¶ms); + if (status) { + ice_debug(hw, ICE_DBG_FLOW, + "Error processing a flow's packet segments\n"); + goto out; + } + + /* Add a HW profile for this flow profile */ + status = ice_add_prof(hw, blk, prof_id, (u8 *)params.ptypes, params.es); + if (status) { + ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n"); + goto out; + } + + INIT_LIST_HEAD(¶ms.prof->entries); + mutex_init(¶ms.prof->entries_lock); + *prof = params.prof; + +out: + if (status) + devm_kfree(ice_hw_to_dev(hw), params.prof); + + return status; +} + +/** + * ice_flow_rem_prof_sync - remove a flow profile + * @hw: pointer to the hardware structure + * @blk: classification stage + * @prof: pointer to flow profile to remove + * + * Assumption: the caller has acquired the lock to the profile list + */ +static enum ice_status +ice_flow_rem_prof_sync(struct ice_hw *hw, enum ice_block blk, + struct ice_flow_prof *prof) +{ + enum ice_status status; + + /* Remove all hardware profiles associated with this flow profile */ + status = ice_rem_prof(hw, blk, prof->id); + if (!status) { + list_del(&prof->l_entry); + mutex_destroy(&prof->entries_lock); + devm_kfree(ice_hw_to_dev(hw), prof); + } + + return status; +} + +/** + * ice_flow_assoc_prof - associate a VSI with a flow profile + * @hw: pointer to the hardware structure + * @blk: classification stage + * @prof: pointer to flow profile + * @vsi_handle: software VSI handle + * + * Assumption: the caller has acquired the lock to the profile list + * and the software VSI handle has been validated + */ +static enum ice_status +ice_flow_assoc_prof(struct ice_hw *hw, enum ice_block blk, + struct ice_flow_prof *prof, u16 vsi_handle) +{ + enum ice_status status = 0; + + if (!test_bit(vsi_handle, prof->vsis)) { + status = ice_add_prof_id_flow(hw, blk, + ice_get_hw_vsi_num(hw, + vsi_handle), + prof->id); + if (!status) + set_bit(vsi_handle, prof->vsis); + else + ice_debug(hw, ICE_DBG_FLOW, + "HW profile add failed, %d\n", + status); + } + + return status; +} + +/** + * ice_flow_disassoc_prof - disassociate a VSI from a flow profile + * @hw: pointer to the hardware structure + * @blk: classification stage + * @prof: pointer to flow profile + * @vsi_handle: software VSI handle + * + * Assumption: the caller has acquired the lock to the profile list + * and the software VSI handle has been validated + */ +static enum ice_status +ice_flow_disassoc_prof(struct ice_hw *hw, enum ice_block blk, + struct ice_flow_prof *prof, u16 vsi_handle) +{ + enum ice_status status = 0; + + if (test_bit(vsi_handle, prof->vsis)) { + status = ice_rem_prof_id_flow(hw, blk, + ice_get_hw_vsi_num(hw, + vsi_handle), + prof->id); + if (!status) + clear_bit(vsi_handle, prof->vsis); + else + ice_debug(hw, ICE_DBG_FLOW, + "HW profile remove failed, %d\n", + status); + } + + return status; +} + +/** + * ice_flow_add_prof - Add a flow profile for packet segments and matched fields + * @hw: pointer to the HW struct + * @blk: classification stage + * @dir: flow direction + * @prof_id: unique ID to identify this flow profile + * @segs: array of one or more packet segments that describe the flow + * @segs_cnt: number of packet segments provided + * @prof: stores the returned flow profile added + */ +static enum ice_status +ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir, + u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt, + struct ice_flow_prof **prof) +{ + enum ice_status status; + + if (segs_cnt > ICE_FLOW_SEG_MAX) + return ICE_ERR_MAX_LIMIT; + + if (!segs_cnt) + return ICE_ERR_PARAM; + + if (!segs) + return ICE_ERR_BAD_PTR; + + status = ice_flow_val_hdrs(segs, segs_cnt); + if (status) + return status; + + mutex_lock(&hw->fl_profs_locks[blk]); + + status = ice_flow_add_prof_sync(hw, blk, dir, prof_id, segs, segs_cnt, + prof); + if (!status) + list_add(&(*prof)->l_entry, &hw->fl_profs[blk]); + + mutex_unlock(&hw->fl_profs_locks[blk]); + + return status; +} + +/** + * ice_flow_rem_prof - Remove a flow profile and all entries associated with it + * @hw: pointer to the HW struct + * @blk: the block for which the flow profile is to be removed + * @prof_id: unique ID of the flow profile to be removed + */ +static enum ice_status +ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id) +{ + struct ice_flow_prof *prof; + enum ice_status status; + + mutex_lock(&hw->fl_profs_locks[blk]); + + prof = ice_flow_find_prof_id(hw, blk, prof_id); + if (!prof) { + status = ICE_ERR_DOES_NOT_EXIST; + goto out; + } + + /* prof becomes invalid after the call */ + status = ice_flow_rem_prof_sync(hw, blk, prof); + +out: + mutex_unlock(&hw->fl_profs_locks[blk]); + + return status; +} + +/** + * ice_flow_set_fld_ext - specifies locations of field from entry's input buffer + * @seg: packet segment the field being set belongs to + * @fld: field to be set + * @type: type of the field + * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from + * entry's input buffer + * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's + * input buffer + * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from + * entry's input buffer + * + * This helper function stores information of a field being matched, including + * the type of the field and the locations of the value to match, the mask, and + * and the upper-bound value in the start of the input buffer for a flow entry. + * This function should only be used for fixed-size data structures. + * + * This function also opportunistically determines the protocol headers to be + * present based on the fields being set. Some fields cannot be used alone to + * determine the protocol headers present. Sometimes, fields for particular + * protocol headers are not matched. In those cases, the protocol headers + * must be explicitly set. + */ +static void +ice_flow_set_fld_ext(struct ice_flow_seg_info *seg, enum ice_flow_field fld, + enum ice_flow_fld_match_type type, u16 val_loc, + u16 mask_loc, u16 last_loc) +{ + u64 bit = BIT_ULL(fld); + + seg->match |= bit; + if (type == ICE_FLOW_FLD_TYPE_RANGE) + seg->range |= bit; + + seg->fields[fld].type = type; + seg->fields[fld].src.val = val_loc; + seg->fields[fld].src.mask = mask_loc; + seg->fields[fld].src.last = last_loc; + + ICE_FLOW_SET_HDRS(seg, ice_flds_info[fld].hdr); +} + +/** + * ice_flow_set_fld - specifies locations of field from entry's input buffer + * @seg: packet segment the field being set belongs to + * @fld: field to be set + * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from + * entry's input buffer + * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's + * input buffer + * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from + * entry's input buffer + * @range: indicate if field being matched is to be in a range + * + * This function specifies the locations, in the form of byte offsets from the + * start of the input buffer for a flow entry, from where the value to match, + * the mask value, and upper value can be extracted. These locations are then + * stored in the flow profile. When adding a flow entry associated with the + * flow profile, these locations will be used to quickly extract the values and + * create the content of a match entry. This function should only be used for + * fixed-size data structures. + */ +static void +ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld, + u16 val_loc, u16 mask_loc, u16 last_loc, bool range) +{ + enum ice_flow_fld_match_type t = range ? + ICE_FLOW_FLD_TYPE_RANGE : ICE_FLOW_FLD_TYPE_REG; + + ice_flow_set_fld_ext(seg, fld, t, val_loc, mask_loc, last_loc); +} + +#define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \ + (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6) + +#define ICE_FLOW_RSS_SEG_HDR_L4_MASKS \ + (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP) + +#define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \ + (ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \ + ICE_FLOW_RSS_SEG_HDR_L4_MASKS) + +/** + * ice_flow_set_rss_seg_info - setup packet segments for RSS + * @segs: pointer to the flow field segment(s) + * @hash_fields: fields to be hashed on for the segment(s) + * @flow_hdr: protocol header fields within a packet segment + * + * Helper function to extract fields from hash bitmap and use flow + * header value to set flow field segment for further use in flow + * profile entry or removal. + */ +static enum ice_status +ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields, + u32 flow_hdr) +{ + u64 val; + u8 i; + + for_each_set_bit(i, (unsigned long *)&hash_fields, + ICE_FLOW_FIELD_IDX_MAX) + ice_flow_set_fld(segs, (enum ice_flow_field)i, + ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, false); + + ICE_FLOW_SET_HDRS(segs, flow_hdr); + + if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS) + return ICE_ERR_PARAM; + + val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS); + if (val && !is_power_of_2(val)) + return ICE_ERR_CFG; + + val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L4_MASKS); + if (val && !is_power_of_2(val)) + return ICE_ERR_CFG; + + return 0; +} + +/** + * ice_rem_vsi_rss_list - remove VSI from RSS list + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * + * Remove the VSI from all RSS configurations in the list. + */ +void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle) +{ + struct ice_rss_cfg *r, *tmp; + + if (list_empty(&hw->rss_list_head)) + return; + + mutex_lock(&hw->rss_locks); + list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry) + if (test_and_clear_bit(vsi_handle, r->vsis)) + if (bitmap_empty(r->vsis, ICE_MAX_VSI)) { + list_del(&r->l_entry); + devm_kfree(ice_hw_to_dev(hw), r); + } + mutex_unlock(&hw->rss_locks); +} + +/** + * ice_rem_vsi_rss_cfg - remove RSS configurations associated with VSI + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * + * This function will iterate through all flow profiles and disassociate + * the VSI from that profile. If the flow profile has no VSIs it will + * be removed. + */ +enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle) +{ + const enum ice_block blk = ICE_BLK_RSS; + struct ice_flow_prof *p, *t; + enum ice_status status = 0; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + if (list_empty(&hw->fl_profs[blk])) + return 0; + + mutex_lock(&hw->fl_profs_locks[blk]); + list_for_each_entry_safe(p, t, &hw->fl_profs[blk], l_entry) + if (test_bit(vsi_handle, p->vsis)) { + status = ice_flow_disassoc_prof(hw, blk, p, vsi_handle); + if (status) + break; + + if (bitmap_empty(p->vsis, ICE_MAX_VSI)) { + status = ice_flow_rem_prof_sync(hw, blk, p); + if (status) + break; + } + } + mutex_unlock(&hw->fl_profs_locks[blk]); + + return status; +} + +/** + * ice_rem_rss_list - remove RSS configuration from list + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @prof: pointer to flow profile + * + * Assumption: lock has already been acquired for RSS list + */ +static void +ice_rem_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof) +{ + struct ice_rss_cfg *r, *tmp; + + /* Search for RSS hash fields associated to the VSI that match the + * hash configurations associated to the flow profile. If found + * remove from the RSS entry list of the VSI context and delete entry. + */ + list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry) + if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match && + r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) { + clear_bit(vsi_handle, r->vsis); + if (bitmap_empty(r->vsis, ICE_MAX_VSI)) { + list_del(&r->l_entry); + devm_kfree(ice_hw_to_dev(hw), r); + } + return; + } +} + +/** + * ice_add_rss_list - add RSS configuration to list + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @prof: pointer to flow profile + * + * Assumption: lock has already been acquired for RSS list + */ +static enum ice_status +ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof) +{ + struct ice_rss_cfg *r, *rss_cfg; + + list_for_each_entry(r, &hw->rss_list_head, l_entry) + if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match && + r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) { + set_bit(vsi_handle, r->vsis); + return 0; + } + + rss_cfg = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rss_cfg), + GFP_KERNEL); + if (!rss_cfg) + return ICE_ERR_NO_MEMORY; + + rss_cfg->hashed_flds = prof->segs[prof->segs_cnt - 1].match; + rss_cfg->packet_hdr = prof->segs[prof->segs_cnt - 1].hdrs; + set_bit(vsi_handle, rss_cfg->vsis); + + list_add_tail(&rss_cfg->l_entry, &hw->rss_list_head); + + return 0; +} + +#define ICE_FLOW_PROF_HASH_S 0 +#define ICE_FLOW_PROF_HASH_M (0xFFFFFFFFULL << ICE_FLOW_PROF_HASH_S) +#define ICE_FLOW_PROF_HDR_S 32 +#define ICE_FLOW_PROF_HDR_M (0x3FFFFFFFULL << ICE_FLOW_PROF_HDR_S) +#define ICE_FLOW_PROF_ENCAP_S 63 +#define ICE_FLOW_PROF_ENCAP_M (BIT_ULL(ICE_FLOW_PROF_ENCAP_S)) + +#define ICE_RSS_OUTER_HEADERS 1 + +/* Flow profile ID format: + * [0:31] - Packet match fields + * [32:62] - Protocol header + * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled + */ +#define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \ + (u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \ + (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \ + ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0)) + +/** + * ice_add_rss_cfg_sync - add an RSS configuration + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure + * @addl_hdrs: protocol header fields + * @segs_cnt: packet segment count + * + * Assumption: lock has already been acquired for RSS list + */ +static enum ice_status +ice_add_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, + u32 addl_hdrs, u8 segs_cnt) +{ + const enum ice_block blk = ICE_BLK_RSS; + struct ice_flow_prof *prof = NULL; + struct ice_flow_seg_info *segs; + enum ice_status status; + + if (!segs_cnt || segs_cnt > ICE_FLOW_SEG_MAX) + return ICE_ERR_PARAM; + + segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL); + if (!segs) + return ICE_ERR_NO_MEMORY; + + /* Construct the packet segment info from the hashed fields */ + status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds, + addl_hdrs); + if (status) + goto exit; + + /* Search for a flow profile that has matching headers, hash fields + * and has the input VSI associated to it. If found, no further + * operations required and exit. + */ + prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt, + vsi_handle, + ICE_FLOW_FIND_PROF_CHK_FLDS | + ICE_FLOW_FIND_PROF_CHK_VSI); + if (prof) + goto exit; + + /* Check if a flow profile exists with the same protocol headers and + * associated with the input VSI. If so disassociate the VSI from + * this profile. The VSI will be added to a new profile created with + * the protocol header and new hash field configuration. + */ + prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt, + vsi_handle, ICE_FLOW_FIND_PROF_CHK_VSI); + if (prof) { + status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle); + if (!status) + ice_rem_rss_list(hw, vsi_handle, prof); + else + goto exit; + + /* Remove profile if it has no VSIs associated */ + if (bitmap_empty(prof->vsis, ICE_MAX_VSI)) { + status = ice_flow_rem_prof(hw, blk, prof->id); + if (status) + goto exit; + } + } + + /* Search for a profile that has same match fields only. If this + * exists then associate the VSI to this profile. + */ + prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt, + vsi_handle, + ICE_FLOW_FIND_PROF_CHK_FLDS); + if (prof) { + status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle); + if (!status) + status = ice_add_rss_list(hw, vsi_handle, prof); + goto exit; + } + + /* Create a new flow profile with generated profile and packet + * segment information. + */ + status = ice_flow_add_prof(hw, blk, ICE_FLOW_RX, + ICE_FLOW_GEN_PROFID(hashed_flds, + segs[segs_cnt - 1].hdrs, + segs_cnt), + segs, segs_cnt, &prof); + if (status) + goto exit; + + status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle); + /* If association to a new flow profile failed then this profile can + * be removed. + */ + if (status) { + ice_flow_rem_prof(hw, blk, prof->id); + goto exit; + } + + status = ice_add_rss_list(hw, vsi_handle, prof); + +exit: + kfree(segs); + return status; +} + +/** + * ice_add_rss_cfg - add an RSS configuration with specified hashed fields + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure + * @addl_hdrs: protocol header fields + * + * This function will generate a flow profile based on fields associated with + * the input fields to hash on, the flow type and use the VSI number to add + * a flow entry to the profile. + */ +enum ice_status +ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, + u32 addl_hdrs) +{ + enum ice_status status; + + if (hashed_flds == ICE_HASH_INVALID || + !ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + mutex_lock(&hw->rss_locks); + status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs, + ICE_RSS_OUTER_HEADERS); + mutex_unlock(&hw->rss_locks); + + return status; +} + +/* Mapping of AVF hash bit fields to an L3-L4 hash combination. + * As the ice_flow_avf_hdr_field represent individual bit shifts in a hash, + * convert its values to their appropriate flow L3, L4 values. + */ +#define ICE_FLOW_AVF_RSS_IPV4_MASKS \ + (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4)) +#define ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS \ + (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP)) +#define ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS \ + (BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP)) +#define ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS \ + (ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS | \ + ICE_FLOW_AVF_RSS_IPV4_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP)) + +#define ICE_FLOW_AVF_RSS_IPV6_MASKS \ + (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6)) +#define ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS \ + (BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP)) +#define ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS \ + (BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP)) +#define ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS \ + (ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS | \ + ICE_FLOW_AVF_RSS_IPV6_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP)) + +/** + * ice_add_avf_rss_cfg - add an RSS configuration for AVF driver + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @avf_hash: hash bit fields (ICE_AVF_FLOW_FIELD_*) to configure + * + * This function will take the hash bitmap provided by the AVF driver via a + * message, convert it to ICE-compatible values, and configure RSS flow + * profiles. + */ +enum ice_status +ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 avf_hash) +{ + enum ice_status status = 0; + u64 hash_flds; + + if (avf_hash == ICE_AVF_FLOW_FIELD_INVALID || + !ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + /* Make sure no unsupported bits are specified */ + if (avf_hash & ~(ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS | + ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS)) + return ICE_ERR_CFG; + + hash_flds = avf_hash; + + /* Always create an L3 RSS configuration for any L4 RSS configuration */ + if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS) + hash_flds |= ICE_FLOW_AVF_RSS_IPV4_MASKS; + + if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS) + hash_flds |= ICE_FLOW_AVF_RSS_IPV6_MASKS; + + /* Create the corresponding RSS configuration for each valid hash bit */ + while (hash_flds) { + u64 rss_hash = ICE_HASH_INVALID; + + if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS) { + if (hash_flds & ICE_FLOW_AVF_RSS_IPV4_MASKS) { + rss_hash = ICE_FLOW_HASH_IPV4; + hash_flds &= ~ICE_FLOW_AVF_RSS_IPV4_MASKS; + } else if (hash_flds & + ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS) { + rss_hash = ICE_FLOW_HASH_IPV4 | + ICE_FLOW_HASH_TCP_PORT; + hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS; + } else if (hash_flds & + ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS) { + rss_hash = ICE_FLOW_HASH_IPV4 | + ICE_FLOW_HASH_UDP_PORT; + hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS; + } else if (hash_flds & + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP)) { + rss_hash = ICE_FLOW_HASH_IPV4 | + ICE_FLOW_HASH_SCTP_PORT; + hash_flds &= + ~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP); + } + } else if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS) { + if (hash_flds & ICE_FLOW_AVF_RSS_IPV6_MASKS) { + rss_hash = ICE_FLOW_HASH_IPV6; + hash_flds &= ~ICE_FLOW_AVF_RSS_IPV6_MASKS; + } else if (hash_flds & + ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS) { + rss_hash = ICE_FLOW_HASH_IPV6 | + ICE_FLOW_HASH_TCP_PORT; + hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS; + } else if (hash_flds & + ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS) { + rss_hash = ICE_FLOW_HASH_IPV6 | + ICE_FLOW_HASH_UDP_PORT; + hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS; + } else if (hash_flds & + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP)) { + rss_hash = ICE_FLOW_HASH_IPV6 | + ICE_FLOW_HASH_SCTP_PORT; + hash_flds &= + ~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP); + } + } + + if (rss_hash == ICE_HASH_INVALID) + return ICE_ERR_OUT_OF_RANGE; + + status = ice_add_rss_cfg(hw, vsi_handle, rss_hash, + ICE_FLOW_SEG_HDR_NONE); + if (status) + break; + } + + return status; +} + +/** + * ice_replay_rss_cfg - replay RSS configurations associated with VSI + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + */ +enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle) +{ + enum ice_status status = 0; + struct ice_rss_cfg *r; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + mutex_lock(&hw->rss_locks); + list_for_each_entry(r, &hw->rss_list_head, l_entry) { + if (test_bit(vsi_handle, r->vsis)) { + status = ice_add_rss_cfg_sync(hw, vsi_handle, + r->hashed_flds, + r->packet_hdr, + ICE_RSS_OUTER_HEADERS); + if (status) + break; + } + } + mutex_unlock(&hw->rss_locks); + + return status; +} + +/** + * ice_get_rss_cfg - returns hashed fields for the given header types + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @hdrs: protocol header type + * + * This function will return the match fields of the first instance of flow + * profile having the given header types and containing input VSI + */ +u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs) +{ + struct ice_rss_cfg *r, *rss_cfg = NULL; + + /* verify if the protocol header is non zero and VSI is valid */ + if (hdrs == ICE_FLOW_SEG_HDR_NONE || !ice_is_vsi_valid(hw, vsi_handle)) + return ICE_HASH_INVALID; + + mutex_lock(&hw->rss_locks); + list_for_each_entry(r, &hw->rss_list_head, l_entry) + if (test_bit(vsi_handle, r->vsis) && + r->packet_hdr == hdrs) { + rss_cfg = r; + break; + } + mutex_unlock(&hw->rss_locks); + + return rss_cfg ? rss_cfg->hashed_flds : ICE_HASH_INVALID; +} diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h new file mode 100644 index 000000000000..5558627bd5eb --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_flow.h @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_FLOW_H_ +#define _ICE_FLOW_H_ + +#define ICE_FLOW_ENTRY_HANDLE_INVAL 0 +#define ICE_FLOW_FLD_OFF_INVAL 0xffff + +/* Generate flow hash field from flow field type(s) */ +#define ICE_FLOW_HASH_IPV4 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)) +#define ICE_FLOW_HASH_IPV6 \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)) +#define ICE_FLOW_HASH_TCP_PORT \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)) +#define ICE_FLOW_HASH_UDP_PORT \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)) +#define ICE_FLOW_HASH_SCTP_PORT \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)) + +#define ICE_HASH_INVALID 0 +#define ICE_HASH_TCP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_TCP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_TCP_PORT) +#define ICE_HASH_UDP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_HASH_UDP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT) + +/* Protocol header fields within a packet segment. A segment consists of one or + * more protocol headers that make up a logical group of protocol headers. Each + * logical group of protocol headers encapsulates or is encapsulated using/by + * tunneling or encapsulation protocols for network virtualization such as GRE, + * VxLAN, etc. + */ +enum ice_flow_seg_hdr { + ICE_FLOW_SEG_HDR_NONE = 0x00000000, + ICE_FLOW_SEG_HDR_IPV4 = 0x00000004, + ICE_FLOW_SEG_HDR_IPV6 = 0x00000008, + ICE_FLOW_SEG_HDR_TCP = 0x00000040, + ICE_FLOW_SEG_HDR_UDP = 0x00000080, + ICE_FLOW_SEG_HDR_SCTP = 0x00000100, +}; + +enum ice_flow_field { + /* L3 */ + ICE_FLOW_FIELD_IDX_IPV4_SA, + ICE_FLOW_FIELD_IDX_IPV4_DA, + ICE_FLOW_FIELD_IDX_IPV6_SA, + ICE_FLOW_FIELD_IDX_IPV6_DA, + /* L4 */ + ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, + ICE_FLOW_FIELD_IDX_TCP_DST_PORT, + ICE_FLOW_FIELD_IDX_UDP_SRC_PORT, + ICE_FLOW_FIELD_IDX_UDP_DST_PORT, + ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, + ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, + /* The total number of enums must not exceed 64 */ + ICE_FLOW_FIELD_IDX_MAX +}; + +/* Flow headers and fields for AVF support */ +enum ice_flow_avf_hdr_field { + /* Values 0 - 28 are reserved for future use */ + ICE_AVF_FLOW_FIELD_INVALID = 0, + ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP = 29, + ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP, + ICE_AVF_FLOW_FIELD_IPV4_UDP, + ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK, + ICE_AVF_FLOW_FIELD_IPV4_TCP, + ICE_AVF_FLOW_FIELD_IPV4_SCTP, + ICE_AVF_FLOW_FIELD_IPV4_OTHER, + ICE_AVF_FLOW_FIELD_FRAG_IPV4, + /* Values 37-38 are reserved */ + ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP = 39, + ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP, + ICE_AVF_FLOW_FIELD_IPV6_UDP, + ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK, + ICE_AVF_FLOW_FIELD_IPV6_TCP, + ICE_AVF_FLOW_FIELD_IPV6_SCTP, + ICE_AVF_FLOW_FIELD_IPV6_OTHER, + ICE_AVF_FLOW_FIELD_FRAG_IPV6, + ICE_AVF_FLOW_FIELD_RSVD47, + ICE_AVF_FLOW_FIELD_FCOE_OX, + ICE_AVF_FLOW_FIELD_FCOE_RX, + ICE_AVF_FLOW_FIELD_FCOE_OTHER, + /* Values 51-62 are reserved */ + ICE_AVF_FLOW_FIELD_L2_PAYLOAD = 63, + ICE_AVF_FLOW_FIELD_MAX +}; + +/* Supported RSS offloads This macro is defined to support + * VIRTCHNL_OP_GET_RSS_HENA_CAPS ops. PF driver sends the RSS hardware + * capabilities to the caller of this ops. + */ +#define ICE_DEFAULT_RSS_HENA ( \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \ + BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP)) + +enum ice_flow_dir { + ICE_FLOW_RX = 0x02, +}; + +enum ice_flow_priority { + ICE_FLOW_PRIO_LOW, + ICE_FLOW_PRIO_NORMAL, + ICE_FLOW_PRIO_HIGH +}; + +#define ICE_FLOW_SEG_MAX 2 +#define ICE_FLOW_FV_EXTRACT_SZ 2 + +#define ICE_FLOW_SET_HDRS(seg, val) ((seg)->hdrs |= (u32)(val)) + +struct ice_flow_seg_xtrct { + u8 prot_id; /* Protocol ID of extracted header field */ + u16 off; /* Starting offset of the field in header in bytes */ + u8 idx; /* Index of FV entry used */ + u8 disp; /* Displacement of field in bits fr. FV entry's start */ +}; + +enum ice_flow_fld_match_type { + ICE_FLOW_FLD_TYPE_REG, /* Value, mask */ + ICE_FLOW_FLD_TYPE_RANGE, /* Value, mask, last (upper bound) */ + ICE_FLOW_FLD_TYPE_PREFIX, /* IP address, prefix, size of prefix */ + ICE_FLOW_FLD_TYPE_SIZE, /* Value, mask, size of match */ +}; + +struct ice_flow_fld_loc { + /* Describe offsets of field information relative to the beginning of + * input buffer provided when adding flow entries. + */ + u16 val; /* Offset where the value is located */ + u16 mask; /* Offset where the mask/prefix value is located */ + u16 last; /* Length or offset where the upper value is located */ +}; + +struct ice_flow_fld_info { + enum ice_flow_fld_match_type type; + /* Location where to retrieve data from an input buffer */ + struct ice_flow_fld_loc src; + /* Location where to put the data into the final entry buffer */ + struct ice_flow_fld_loc entry; + struct ice_flow_seg_xtrct xtrct; +}; + +struct ice_flow_seg_info { + u32 hdrs; /* Bitmask indicating protocol headers present */ + u64 match; /* Bitmask indicating header fields to be matched */ + u64 range; /* Bitmask indicating header fields matched as ranges */ + + struct ice_flow_fld_info fields[ICE_FLOW_FIELD_IDX_MAX]; +}; + +struct ice_flow_prof { + struct list_head l_entry; + + u64 id; + enum ice_flow_dir dir; + u8 segs_cnt; + + /* Keep track of flow entries associated with this flow profile */ + struct mutex entries_lock; + struct list_head entries; + + struct ice_flow_seg_info segs[ICE_FLOW_SEG_MAX]; + + /* software VSI handles referenced by this flow profile */ + DECLARE_BITMAP(vsis, ICE_MAX_VSI); +}; + +struct ice_rss_cfg { + struct list_head l_entry; + /* bitmap of VSIs added to the RSS entry */ + DECLARE_BITMAP(vsis, ICE_MAX_VSI); + u64 hashed_flds; + u32 packet_hdr; +}; + +enum ice_status ice_flow_rem_entry(struct ice_hw *hw, u64 entry_h); +void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle); +enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle); +enum ice_status +ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds); +enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle); +enum ice_status +ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, + u32 addl_hdrs); +u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs); +#endif /* _ICE_FLOW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 6c5ce05742b1..6db3d0494127 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -52,18 +52,14 @@ #define PF_MBX_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0) #define PF_MBX_ATQLEN_ATQENABLE_M BIT(31) #define PF_MBX_ATQT 0x0022E300 +#define PRTDCB_GENC 0x00083000 +#define PRTDCB_GENC_PFCLDA_S 16 +#define PRTDCB_GENC_PFCLDA_M ICE_M(0xFFFF, 16) #define PRTDCB_GENS 0x00083020 #define PRTDCB_GENS_DCBX_STATUS_S 0 #define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0) -#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256)) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, 0) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S 8 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M ICE_M(0x3F, 8) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S 16 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M ICE_M(0x3F, 16) -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S 24 -#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M ICE_M(0x3F, 24) +#define GL_PREEXT_L2_PMASK0(_i) (0x0020F0FC + ((_i) * 4)) +#define GL_PREEXT_L2_PMASK1(_i) (0x0020F108 + ((_i) * 4)) #define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4)) #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0 #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, 0) @@ -127,8 +123,11 @@ #define GLINT_DYN_CTL_CLEARPBA_M BIT(1) #define GLINT_DYN_CTL_SWINT_TRIG_M BIT(2) #define GLINT_DYN_CTL_ITR_INDX_S 3 +#define GLINT_DYN_CTL_ITR_INDX_M ICE_M(0x3, 3) #define GLINT_DYN_CTL_INTERVAL_S 5 +#define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) +#define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) #define GLINT_ITR(_i, _INT) (0x00154000 + ((_i) * 8192 + (_INT) * 4)) #define GLINT_RATE(_INT) (0x0015A000 + ((_INT) * 4)) @@ -268,8 +267,14 @@ #define GLNVM_GENS_SR_SIZE_S 5 #define GLNVM_GENS_SR_SIZE_M ICE_M(0x7, 5) #define GLNVM_ULD 0x000B6008 +#define GLNVM_ULD_PCIER_DONE_M BIT(0) +#define GLNVM_ULD_PCIER_DONE_1_M BIT(1) #define GLNVM_ULD_CORER_DONE_M BIT(3) #define GLNVM_ULD_GLOBR_DONE_M BIT(4) +#define GLNVM_ULD_POR_DONE_M BIT(5) +#define GLNVM_ULD_POR_DONE_1_M BIT(8) +#define GLNVM_ULD_PCIER_DONE_2_M BIT(9) +#define GLNVM_ULD_PE_DONE_M BIT(10) #define GLPCI_CNF2 0x000BE004 #define GLPCI_CNF2_CACHELINE_SIZE_M BIT(1) #define PF_FUNC_RID 0x0009E880 @@ -281,14 +286,10 @@ #define GL_PWR_MODE_CTL 0x000B820C #define GL_PWR_MODE_CTL_CAR_MAX_BW_S 30 #define GL_PWR_MODE_CTL_CAR_MAX_BW_M ICE_M(0x3, 30) -#define GLPRT_BPRCH(_i) (0x00381384 + ((_i) * 8)) #define GLPRT_BPRCL(_i) (0x00381380 + ((_i) * 8)) -#define GLPRT_BPTCH(_i) (0x00381244 + ((_i) * 8)) #define GLPRT_BPTCL(_i) (0x00381240 + ((_i) * 8)) #define GLPRT_CRCERRS(_i) (0x00380100 + ((_i) * 8)) -#define GLPRT_GORCH(_i) (0x00380004 + ((_i) * 8)) #define GLPRT_GORCL(_i) (0x00380000 + ((_i) * 8)) -#define GLPRT_GOTCH(_i) (0x00380B44 + ((_i) * 8)) #define GLPRT_GOTCL(_i) (0x00380B40 + ((_i) * 8)) #define GLPRT_ILLERRC(_i) (0x003801C0 + ((_i) * 8)) #define GLPRT_LXOFFRXC(_i) (0x003802C0 + ((_i) * 8)) @@ -296,38 +297,22 @@ #define GLPRT_LXONRXC(_i) (0x00380280 + ((_i) * 8)) #define GLPRT_LXONTXC(_i) (0x00381140 + ((_i) * 8)) #define GLPRT_MLFC(_i) (0x00380040 + ((_i) * 8)) -#define GLPRT_MPRCH(_i) (0x00381344 + ((_i) * 8)) #define GLPRT_MPRCL(_i) (0x00381340 + ((_i) * 8)) -#define GLPRT_MPTCH(_i) (0x00381204 + ((_i) * 8)) #define GLPRT_MPTCL(_i) (0x00381200 + ((_i) * 8)) #define GLPRT_MRFC(_i) (0x00380080 + ((_i) * 8)) -#define GLPRT_PRC1023H(_i) (0x00380A04 + ((_i) * 8)) #define GLPRT_PRC1023L(_i) (0x00380A00 + ((_i) * 8)) -#define GLPRT_PRC127H(_i) (0x00380944 + ((_i) * 8)) #define GLPRT_PRC127L(_i) (0x00380940 + ((_i) * 8)) -#define GLPRT_PRC1522H(_i) (0x00380A44 + ((_i) * 8)) #define GLPRT_PRC1522L(_i) (0x00380A40 + ((_i) * 8)) -#define GLPRT_PRC255H(_i) (0x00380984 + ((_i) * 8)) #define GLPRT_PRC255L(_i) (0x00380980 + ((_i) * 8)) -#define GLPRT_PRC511H(_i) (0x003809C4 + ((_i) * 8)) #define GLPRT_PRC511L(_i) (0x003809C0 + ((_i) * 8)) -#define GLPRT_PRC64H(_i) (0x00380904 + ((_i) * 8)) #define GLPRT_PRC64L(_i) (0x00380900 + ((_i) * 8)) -#define GLPRT_PRC9522H(_i) (0x00380A84 + ((_i) * 8)) #define GLPRT_PRC9522L(_i) (0x00380A80 + ((_i) * 8)) -#define GLPRT_PTC1023H(_i) (0x00380C84 + ((_i) * 8)) #define GLPRT_PTC1023L(_i) (0x00380C80 + ((_i) * 8)) -#define GLPRT_PTC127H(_i) (0x00380BC4 + ((_i) * 8)) #define GLPRT_PTC127L(_i) (0x00380BC0 + ((_i) * 8)) -#define GLPRT_PTC1522H(_i) (0x00380CC4 + ((_i) * 8)) #define GLPRT_PTC1522L(_i) (0x00380CC0 + ((_i) * 8)) -#define GLPRT_PTC255H(_i) (0x00380C04 + ((_i) * 8)) #define GLPRT_PTC255L(_i) (0x00380C00 + ((_i) * 8)) -#define GLPRT_PTC511H(_i) (0x00380C44 + ((_i) * 8)) #define GLPRT_PTC511L(_i) (0x00380C40 + ((_i) * 8)) -#define GLPRT_PTC64H(_i) (0x00380B84 + ((_i) * 8)) #define GLPRT_PTC64L(_i) (0x00380B80 + ((_i) * 8)) -#define GLPRT_PTC9522H(_i) (0x00380D04 + ((_i) * 8)) #define GLPRT_PTC9522L(_i) (0x00380D00 + ((_i) * 8)) #define GLPRT_PXOFFRXC(_i, _j) (0x00380500 + ((_i) * 8 + (_j) * 64)) #define GLPRT_PXOFFTXC(_i, _j) (0x00380F40 + ((_i) * 8 + (_j) * 64)) @@ -340,32 +325,22 @@ #define GLPRT_RUC(_i) (0x00380200 + ((_i) * 8)) #define GLPRT_RXON2OFFCNT(_i, _j) (0x00380700 + ((_i) * 8 + (_j) * 64)) #define GLPRT_TDOLD(_i) (0x00381280 + ((_i) * 8)) -#define GLPRT_UPRCH(_i) (0x00381304 + ((_i) * 8)) #define GLPRT_UPRCL(_i) (0x00381300 + ((_i) * 8)) -#define GLPRT_UPTCH(_i) (0x003811C4 + ((_i) * 8)) #define GLPRT_UPTCL(_i) (0x003811C0 + ((_i) * 8)) -#define GLV_BPRCH(_i) (0x003B6004 + ((_i) * 8)) #define GLV_BPRCL(_i) (0x003B6000 + ((_i) * 8)) -#define GLV_BPTCH(_i) (0x0030E004 + ((_i) * 8)) #define GLV_BPTCL(_i) (0x0030E000 + ((_i) * 8)) -#define GLV_GORCH(_i) (0x003B0004 + ((_i) * 8)) #define GLV_GORCL(_i) (0x003B0000 + ((_i) * 8)) -#define GLV_GOTCH(_i) (0x00300004 + ((_i) * 8)) #define GLV_GOTCL(_i) (0x00300000 + ((_i) * 8)) -#define GLV_MPRCH(_i) (0x003B4004 + ((_i) * 8)) #define GLV_MPRCL(_i) (0x003B4000 + ((_i) * 8)) -#define GLV_MPTCH(_i) (0x0030C004 + ((_i) * 8)) #define GLV_MPTCL(_i) (0x0030C000 + ((_i) * 8)) #define GLV_RDPC(_i) (0x00294C04 + ((_i) * 4)) #define GLV_TEPC(_VSI) (0x00312000 + ((_VSI) * 4)) -#define GLV_UPRCH(_i) (0x003B2004 + ((_i) * 8)) #define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8)) -#define GLV_UPTCH(_i) (0x0030A004 + ((_i) * 8)) #define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8)) -#define PF_VT_PFALLOC_HIF 0x0009DD80 #define VSIQF_HKEY_MAX_INDEX 12 #define VSIQF_HLUT_MAX_INDEX 15 #define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) #define VFINT_DYN_CTLN_CLEARPBA_M BIT(1) +#define PRTRPB_RDPC 0x000AC260 #endif /* _ICE_HW_AUTOGEN_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 510a8c900e61..878e125d8b42 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -199,6 +199,14 @@ enum ice_rxdid { /* Receive Flex Descriptor Rx opcode values */ #define ICE_RX_OPC_MDID 0x01 +/* Receive Descriptor MDID values that access packet flags */ +enum ice_flex_mdid_pkt_flags { + ICE_RX_MDID_PKT_FLAGS_15_0 = 20, + ICE_RX_MDID_PKT_FLAGS_31_16, + ICE_RX_MDID_PKT_FLAGS_47_32, + ICE_RX_MDID_PKT_FLAGS_63_48, +}; + /* Receive Descriptor MDID values */ enum ice_flex_rx_mdid { ICE_RX_MDID_FLOW_ID_LOWER = 5, @@ -211,7 +219,7 @@ enum ice_flex_rx_mdid { /* Rx/Tx Flag64 packet flag bits */ enum ice_flg64_bits { ICE_FLG_PKT_DSI = 0, - ICE_FLG_EVLAN_x8100 = 15, + ICE_FLG_EVLAN_x8100 = 14, ICE_FLG_EVLAN_x9100, ICE_FLG_VLAN_x8100, ICE_FLG_TNL_MAC = 22, @@ -290,6 +298,7 @@ struct ice_rlan_ctx { u8 tphdata_ena; u8 tphhead_ena; u16 lrxqthresh; /* bigger than needed, see above for reason */ + u8 prefena; /* NOTE: normally must be set to 1 at init */ }; struct ice_ctx_ele { @@ -301,7 +310,7 @@ struct ice_ctx_ele { #define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \ .offset = offsetof(struct _struct, _ele), \ - .size_of = FIELD_SIZEOF(struct _struct, _ele), \ + .size_of = sizeof_field(struct _struct, _ele), \ .width = _width, \ .lsb = _lsb, \ } @@ -427,6 +436,7 @@ struct ice_tlan_ctx { #define ICE_TLAN_CTX_VMVF_TYPE_PF 2 u16 src_vsi; u8 tsyn_ena; + u8 internal_usage_flag; u8 alt_vlan; u16 cpuid; /* bigger than needed, see above for reason */ u8 wb_mode; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index a19f5920733b..d974e2fa3e63 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2,192 +2,27 @@ /* Copyright (c) 2018, Intel Corporation. */ #include "ice.h" +#include "ice_base.h" +#include "ice_flow.h" #include "ice_lib.h" #include "ice_dcb_lib.h" /** - * ice_setup_rx_ctx - Configure a receive ring context - * @ring: The Rx ring to configure - * - * Configure the Rx descriptor ring in RLAN context. + * ice_vsi_type_str - maps VSI type enum to string equivalents + * @type: VSI type enum */ -static int ice_setup_rx_ctx(struct ice_ring *ring) +const char *ice_vsi_type_str(enum ice_vsi_type type) { - struct ice_vsi *vsi = ring->vsi; - struct ice_hw *hw = &vsi->back->hw; - u32 rxdid = ICE_RXDID_FLEX_NIC; - struct ice_rlan_ctx rlan_ctx; - u32 regval; - u16 pf_q; - int err; - - /* what is Rx queue number in global space of 2K Rx queues */ - pf_q = vsi->rxq_map[ring->q_index]; - - /* clear the context structure first */ - memset(&rlan_ctx, 0, sizeof(rlan_ctx)); - - rlan_ctx.base = ring->dma >> 7; - - rlan_ctx.qlen = ring->count; - - /* Receive Packet Data Buffer Size. - * The Packet Data Buffer Size is defined in 128 byte units. - */ - rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; - - /* use 32 byte descriptors */ - rlan_ctx.dsize = 1; - - /* Strip the Ethernet CRC bytes before the packet is posted to host - * memory. - */ - rlan_ctx.crcstrip = 1; - - /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ - rlan_ctx.l2tsel = 1; - - rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; - rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; - rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; - - /* This controls whether VLAN is stripped from inner headers - * The VLAN in the inner L2 header is stripped to the receive - * descriptor if enabled by this flag. - */ - rlan_ctx.showiv = 0; - - /* Max packet size for this queue - must not be set to a larger value - * than 5 x DBUF - */ - rlan_ctx.rxmax = min_t(u16, vsi->max_frame, - ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len); - - /* Rx queue threshold in units of 64 */ - rlan_ctx.lrxqthresh = 1; - - /* Enable Flexible Descriptors in the queue context which - * allows this driver to select a specific receive descriptor format - */ - if (vsi->type != ICE_VSI_VF) { - regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); - regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & - QRXFLXP_CNTXT_RXDID_IDX_M; - - /* increasing context priority to pick up profile ID; - * default is 0x01; setting to 0x03 to ensure profile - * is programming if prev context is of same priority - */ - regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & - QRXFLXP_CNTXT_RXDID_PRIO_M; - - wr32(hw, QRXFLXP_CNTXT(pf_q), regval); - } - - /* Absolute queue number out of 2K needs to be passed */ - err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); - if (err) { - dev_err(&vsi->back->pdev->dev, - "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", - pf_q, err); - return -EIO; - } - - if (vsi->type == ICE_VSI_VF) - return 0; - - /* init queue specific tail register */ - ring->tail = hw->hw_addr + QRX_TAIL(pf_q); - writel(0, ring->tail); - ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); - - return 0; -} - -/** - * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance - * @ring: The Tx ring to configure - * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized - * @pf_q: queue index in the PF space - * - * Configure the Tx descriptor ring in TLAN context. - */ -static void -ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) -{ - struct ice_vsi *vsi = ring->vsi; - struct ice_hw *hw = &vsi->back->hw; - - tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; - - tlan_ctx->port_num = vsi->port_info->lport; - - /* Transmit Queue Length */ - tlan_ctx->qlen = ring->count; - - ice_set_cgd_num(tlan_ctx, ring); - - /* PF number */ - tlan_ctx->pf_num = hw->pf_id; - - /* queue belongs to a specific VSI type - * VF / VM index should be programmed per vmvf_type setting: - * for vmvf_type = VF, it is VF number between 0-256 - * for vmvf_type = VM, it is VM number between 0-767 - * for PF or EMP this field should be set to zero - */ - switch (vsi->type) { - case ICE_VSI_LB: - /* fall through */ + switch (type) { case ICE_VSI_PF: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; - break; + return "ICE_VSI_PF"; case ICE_VSI_VF: - /* Firmware expects vmvf_num to be absolute VF ID */ - tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id; - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; - break; + return "ICE_VSI_VF"; + case ICE_VSI_LB: + return "ICE_VSI_LB"; default: - return; - } - - /* make sure the context is associated with the right VSI */ - tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); - - tlan_ctx->tso_ena = ICE_TX_LEGACY; - tlan_ctx->tso_qnum = pf_q; - - /* Legacy or Advanced Host Interface: - * 0: Advanced Host Interface - * 1: Legacy Host Interface - */ - tlan_ctx->legacy_int = ICE_TX_LEGACY; -} - -/** - * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled - * @pf: the PF being configured - * @pf_q: the PF queue - * @ena: enable or disable state of the queue - * - * This routine will wait for the given Rx queue of the PF to reach the - * enabled or disabled state. - * Returns -ETIMEDOUT in case of failing to reach the requested state after - * multiple retries; else will return 0 in case of success. - */ -static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) -{ - int i; - - for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) { - if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) & - QRX_CTRL_QENA_STAT_M)) - return 0; - - usleep_range(20, 40); + return "unknown"; } - - return -ETIMEDOUT; } /** @@ -197,35 +32,12 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) */ static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; int i, ret = 0; for (i = 0; i < vsi->num_rxq; i++) { - int pf_q = vsi->rxq_map[i]; - u32 rx_reg; - - rx_reg = rd32(hw, QRX_CTRL(pf_q)); - - /* Skip if the queue is already in the requested state */ - if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) - continue; - - /* turn on/off the queue */ - if (ena) - rx_reg |= QRX_CTRL_QENA_REQ_M; - else - rx_reg &= ~QRX_CTRL_QENA_REQ_M; - wr32(hw, QRX_CTRL(pf_q), rx_reg); - - /* wait for the change to finish */ - ret = ice_pf_rxq_wait(pf, pf_q, ena); - if (ret) { - dev_err(&pf->pdev->dev, - "VSI idx %d Rx ring %d %sable timeout\n", - vsi->idx, pf_q, (ena ? "en" : "dis")); + ret = ice_vsi_ctrl_rx_ring(vsi, ena, i); + if (ret) break; - } } return ret; @@ -241,24 +53,39 @@ static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena) static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; + + dev = ice_pf_to_dev(pf); /* allocate memory for both Tx and Rx ring pointers */ - vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, + vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq, sizeof(*vsi->tx_rings), GFP_KERNEL); if (!vsi->tx_rings) - goto err_txrings; + return -ENOMEM; - vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, + vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq, sizeof(*vsi->rx_rings), GFP_KERNEL); if (!vsi->rx_rings) - goto err_rxrings; + goto err_rings; + + /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */ + vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq), + sizeof(*vsi->txq_map), GFP_KERNEL); + + if (!vsi->txq_map) + goto err_txq_map; + + vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq, + sizeof(*vsi->rxq_map), GFP_KERNEL); + if (!vsi->rxq_map) + goto err_rxq_map; /* There is no need to allocate q_vectors for a loopback VSI. */ if (vsi->type == ICE_VSI_LB) return 0; /* allocate memory for q_vector pointers */ - vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors, + vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors, sizeof(*vsi->q_vectors), GFP_KERNEL); if (!vsi->q_vectors) goto err_vectors; @@ -266,10 +93,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) return 0; err_vectors: - devm_kfree(&pf->pdev->dev, vsi->rx_rings); -err_rxrings: - devm_kfree(&pf->pdev->dev, vsi->tx_rings); -err_txrings: + devm_kfree(dev, vsi->rxq_map); +err_rxq_map: + devm_kfree(dev, vsi->txq_map); +err_txq_map: + devm_kfree(dev, vsi->rx_rings); +err_rings: + devm_kfree(dev, vsi->tx_rings); return -ENOMEM; } @@ -287,8 +117,7 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC; break; default: - dev_dbg(&vsi->back->pdev->dev, - "Not setting number of Tx/Rx descriptors for VSI type %d\n", + dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n", vsi->type); break; } @@ -311,9 +140,30 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) switch (vsi->type) { case ICE_VSI_PF: - vsi->alloc_txq = pf->num_lan_tx; - vsi->alloc_rxq = pf->num_lan_rx; - vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx); + vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf), + num_online_cpus()); + if (vsi->req_txq) { + vsi->alloc_txq = vsi->req_txq; + vsi->num_txq = vsi->req_txq; + } + + pf->num_lan_tx = vsi->alloc_txq; + + /* only 1 Rx queue unless RSS is enabled */ + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + vsi->alloc_rxq = 1; + } else { + vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf), + num_online_cpus()); + if (vsi->req_rxq) { + vsi->alloc_rxq = vsi->req_rxq; + vsi->num_rxq = vsi->req_rxq; + } + } + + pf->num_lan_rx = vsi->alloc_rxq; + + vsi->num_q_vectors = max_t(int, vsi->alloc_rxq, vsi->alloc_txq); break; case ICE_VSI_VF: vf = &pf->vf[vsi->vf_id]; @@ -331,7 +181,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = 1; break; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type); break; } @@ -377,7 +227,7 @@ void ice_vsi_delete(struct ice_vsi *vsi) struct ice_vsi_ctx *ctxt; enum ice_status status; - ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return; @@ -389,10 +239,10 @@ void ice_vsi_delete(struct ice_vsi *vsi) status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL); if (status) - dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n", - vsi->vsi_num); + dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n", + vsi->vsi_num, status); - devm_kfree(&pf->pdev->dev, ctxt); + kfree(ctxt); } /** @@ -402,20 +252,31 @@ void ice_vsi_delete(struct ice_vsi *vsi) static void ice_vsi_free_arrays(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; + + dev = ice_pf_to_dev(pf); /* free the ring and vector containers */ if (vsi->q_vectors) { - devm_kfree(&pf->pdev->dev, vsi->q_vectors); + devm_kfree(dev, vsi->q_vectors); vsi->q_vectors = NULL; } if (vsi->tx_rings) { - devm_kfree(&pf->pdev->dev, vsi->tx_rings); + devm_kfree(dev, vsi->tx_rings); vsi->tx_rings = NULL; } if (vsi->rx_rings) { - devm_kfree(&pf->pdev->dev, vsi->rx_rings); + devm_kfree(dev, vsi->rx_rings); vsi->rx_rings = NULL; } + if (vsi->txq_map) { + devm_kfree(dev, vsi->txq_map); + vsi->txq_map = NULL; + } + if (vsi->rxq_map) { + devm_kfree(dev, vsi->rxq_map); + vsi->rxq_map = NULL; + } } /** @@ -430,6 +291,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) int ice_vsi_clear(struct ice_vsi *vsi) { struct ice_pf *pf = NULL; + struct device *dev; if (!vsi) return 0; @@ -438,10 +300,10 @@ int ice_vsi_clear(struct ice_vsi *vsi) return -EINVAL; pf = vsi->back; + dev = ice_pf_to_dev(pf); if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) { - dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n", - vsi->idx); + dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx); return -EINVAL; } @@ -454,7 +316,7 @@ int ice_vsi_clear(struct ice_vsi *vsi) ice_vsi_free_arrays(vsi); mutex_unlock(&pf->sw_mutex); - devm_kfree(&pf->pdev->dev, vsi); + devm_kfree(dev, vsi); return 0; } @@ -487,6 +349,7 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) static struct ice_vsi * ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) { + struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; /* Need to protect the allocation of the VSIs at the PF level */ @@ -497,19 +360,19 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) * is available to be populated */ if (pf->next_vsi == ICE_NO_VSI) { - dev_dbg(&pf->pdev->dev, "out of VSI slots!\n"); + dev_dbg(dev, "out of VSI slots!\n"); goto unlock_pf; } - vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL); + vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL); if (!vsi) goto unlock_pf; vsi->type = type; vsi->back = pf; set_bit(__ICE_DOWN, vsi->state); + vsi->idx = pf->next_vsi; - vsi->work_lmt = ICE_DFLT_IRQ_WORK; if (type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); @@ -533,7 +396,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) goto err_rings; break; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + dev_warn(dev, "Unknown VSI type %d\n", vsi->type); goto unlock_pf; } @@ -546,7 +409,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) goto unlock_pf; err_rings: - devm_kfree(&pf->pdev->dev, vsi); + devm_kfree(dev, vsi); vsi = NULL; unlock_pf: mutex_unlock(&pf->sw_mutex); @@ -554,88 +417,6 @@ unlock_pf: } /** - * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI - * @qs_cfg: gathered variables needed for PF->VSI queues assignment - * - * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap - */ -static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) -{ - int offset, i; - - mutex_lock(qs_cfg->qs_mutex); - offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size, - 0, qs_cfg->q_count, 0); - if (offset >= qs_cfg->pf_map_size) { - mutex_unlock(qs_cfg->qs_mutex); - return -ENOMEM; - } - - bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count); - for (i = 0; i < qs_cfg->q_count; i++) - qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset; - mutex_unlock(qs_cfg->qs_mutex); - - return 0; -} - -/** - * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI - * @qs_cfg: gathered variables needed for pf->vsi queues assignment - * - * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap - */ -static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg) -{ - int i, index = 0; - - mutex_lock(qs_cfg->qs_mutex); - for (i = 0; i < qs_cfg->q_count; i++) { - index = find_next_zero_bit(qs_cfg->pf_map, - qs_cfg->pf_map_size, index); - if (index >= qs_cfg->pf_map_size) - goto err_scatter; - set_bit(index, qs_cfg->pf_map); - qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index; - } - mutex_unlock(qs_cfg->qs_mutex); - - return 0; -err_scatter: - for (index = 0; index < i; index++) { - clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map); - qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0; - } - mutex_unlock(qs_cfg->qs_mutex); - - return -ENOMEM; -} - -/** - * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI - * @qs_cfg: gathered variables needed for pf->vsi queues assignment - * - * This function first tries to find contiguous space. If it is not successful, - * it tries with the scatter approach. - * - * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap - */ -static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg) -{ - int ret = 0; - - ret = __ice_vsi_get_qs_contig(qs_cfg); - if (ret) { - /* contig failed, so try with scatter approach */ - qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER; - qs_cfg->q_count = min_t(u16, qs_cfg->q_count, - qs_cfg->scatter_count); - ret = __ice_vsi_get_qs_sc(qs_cfg); - } - return ret; -} - -/** * ice_vsi_get_qs - Assign queues from PF to VSI * @vsi: the VSI to assign queues to * @@ -647,7 +428,7 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi) struct ice_qs_cfg tx_qs_cfg = { .qs_mutex = &pf->avail_q_mutex, .pf_map = pf->avail_txqs, - .pf_map_size = ICE_MAX_TXQS, + .pf_map_size = pf->max_pf_txqs, .q_count = vsi->alloc_txq, .scatter_count = ICE_MAX_SCATTER_TXQS, .vsi_map = vsi->txq_map, @@ -657,7 +438,7 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi) struct ice_qs_cfg rx_qs_cfg = { .qs_mutex = &pf->avail_q_mutex, .pf_map = pf->avail_rxqs, - .pf_map_size = ICE_MAX_RXQS, + .pf_map_size = pf->max_pf_rxqs, .q_count = vsi->alloc_rxq, .scatter_count = ICE_MAX_SCATTER_RXQS, .vsi_map = vsi->rxq_map, @@ -701,19 +482,57 @@ void ice_vsi_put_qs(struct ice_vsi *vsi) } /** - * ice_rss_clean - Delete RSS related VSI structures that hold user inputs + * ice_is_safe_mode + * @pf: pointer to the PF struct + * + * returns true if driver is in safe mode, false otherwise + */ +bool ice_is_safe_mode(struct ice_pf *pf) +{ + return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags); +} + +/** + * ice_vsi_clean_rss_flow_fld - Delete RSS configuration + * @vsi: the VSI being cleaned up + * + * This function deletes RSS input set for all flows that were configured + * for this VSI + */ +static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + + if (ice_is_safe_mode(pf)) + return; + + status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); + if (status) + dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %d\n", + vsi->vsi_num, status); +} + +/** + * ice_rss_clean - Delete RSS related VSI structures and configuration * @vsi: the VSI being removed */ static void ice_rss_clean(struct ice_vsi *vsi) { - struct ice_pf *pf; + struct ice_pf *pf = vsi->back; + struct device *dev; - pf = vsi->back; + dev = ice_pf_to_dev(pf); if (vsi->rss_hkey_user) - devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user); + devm_kfree(dev, vsi->rss_hkey_user); if (vsi->rss_lut_user) - devm_kfree(&pf->pdev->dev, vsi->rss_lut_user); + devm_kfree(dev, vsi->rss_lut_user); + + ice_vsi_clean_rss_flow_fld(vsi); + /* remove RSS replay list */ + if (!ice_is_safe_mode(pf)) + ice_rem_vsi_rss_list(&pf->hw, vsi->idx); } /** @@ -751,7 +570,7 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) case ICE_VSI_LB: break; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", + dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type); break; } @@ -855,7 +674,9 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) else max_rss = ICE_MAX_SMALL_RSS_QS; qcount_rx = min_t(int, rx_numq_tc, max_rss); - qcount_rx = min_t(int, qcount_rx, vsi->rss_size); + if (!vsi->req_rxq) + qcount_rx = min_t(int, qcount_rx, + vsi->rss_size); } } @@ -902,7 +723,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) vsi->num_txq = tx_count; if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) { - dev_dbg(&vsi->back->pdev->dev, "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); + dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); /* since there is a chance that num_rxq could have been changed * in the above for loop, make num_txq equal to num_rxq. */ @@ -927,9 +748,11 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) { u8 lut_type, hash_type; + struct device *dev; struct ice_pf *pf; pf = vsi->back; + dev = ice_pf_to_dev(pf); switch (vsi->type) { case ICE_VSI_PF: @@ -943,10 +766,11 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; break; case ICE_VSI_LB: - dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type); + dev_dbg(dev, "Unsupported VSI type %s\n", + ice_vsi_type_str(vsi->type)); return; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + dev_warn(dev, "Unknown VSI type %d\n", vsi->type); return; } @@ -959,18 +783,21 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) /** * ice_vsi_init - Create and initialize a VSI * @vsi: the VSI being configured + * @init_vsi: is this call creating a VSI * * This initializes a VSI context depending on the VSI type to be added and * passes it down to the add_vsi aq command to create a new VSI. */ -static int ice_vsi_init(struct ice_vsi *vsi) +static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; struct ice_vsi_ctx *ctxt; + struct device *dev; int ret = 0; - ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -987,7 +814,8 @@ static int ice_vsi_init(struct ice_vsi *vsi) ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id; break; default: - return -ENODEV; + ret = -ENODEV; + goto out; } ice_set_dflt_vsi_ctx(ctxt); @@ -996,25 +824,65 @@ static int ice_vsi_init(struct ice_vsi *vsi) ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; /* Set LUT type and HASH type if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { ice_set_rss_vsi_ctx(ctxt, vsi); + /* if updating VSI context, make sure to set valid_section: + * to indicate which section of VSI context being updated + */ + if (!init_vsi) + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + } ctxt->info.sw_id = vsi->port_info->sw_id; ice_vsi_setup_q_map(vsi, ctxt); + if (!init_vsi) /* means VSI being updated */ + /* must to indicate which section of VSI context are + * being modified + */ + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); + + /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off + * respectively + */ + if (vsi->type == ICE_VSI_VF) { + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + if (pf->vf[vsi->vf_id].spoofchk) { + ctxt->info.sec_flags |= + ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + } else { + ctxt->info.sec_flags &= + ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)); + } + } - /* Enable MAC Antispoof with new VSI being initialized or updated */ - if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) { + /* Allow control frames out of main VSI */ + if (vsi->type == ICE_VSI_PF) { + ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; ctxt->info.valid_sections |= cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); - ctxt->info.sec_flags |= - ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; } - ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL); - if (ret) { - dev_err(&pf->pdev->dev, - "Add VSI failed, err %d\n", ret); - return -EIO; + if (init_vsi) { + ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL); + if (ret) { + dev_err(dev, "Add VSI failed, err %d\n", ret); + ret = -EIO; + goto out; + } + } else { + ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (ret) { + dev_err(dev, "Update VSI failed, err %d\n", ret); + ret = -EIO; + goto out; + } } /* keep context for update VSI operations */ @@ -1023,136 +891,9 @@ static int ice_vsi_init(struct ice_vsi *vsi) /* record VSI number returned */ vsi->vsi_num = ctxt->vsi_num; - devm_kfree(&pf->pdev->dev, ctxt); - return ret; -} - -/** - * ice_free_q_vector - Free memory allocated for a specific interrupt vector - * @vsi: VSI having the memory freed - * @v_idx: index of the vector to be freed - */ -static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) -{ - struct ice_q_vector *q_vector; - struct ice_pf *pf = vsi->back; - struct ice_ring *ring; - - if (!vsi->q_vectors[v_idx]) { - dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n", - v_idx); - return; - } - q_vector = vsi->q_vectors[v_idx]; - - ice_for_each_ring(ring, q_vector->tx) - ring->q_vector = NULL; - ice_for_each_ring(ring, q_vector->rx) - ring->q_vector = NULL; - - /* only VSI with an associated netdev is set up with NAPI */ - if (vsi->netdev) - netif_napi_del(&q_vector->napi); - - devm_kfree(&pf->pdev->dev, q_vector); - vsi->q_vectors[v_idx] = NULL; -} - -/** - * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors - * @vsi: the VSI having memory freed - */ -void ice_vsi_free_q_vectors(struct ice_vsi *vsi) -{ - int v_idx; - - ice_for_each_q_vector(vsi, v_idx) - ice_free_q_vector(vsi, v_idx); -} - -/** - * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector - * @vsi: the VSI being configured - * @v_idx: index of the vector in the VSI struct - * - * We allocate one q_vector. If allocation fails we return -ENOMEM. - */ -static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) -{ - struct ice_pf *pf = vsi->back; - struct ice_q_vector *q_vector; - - /* allocate q_vector */ - q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) - return -ENOMEM; - - q_vector->vsi = vsi; - q_vector->v_idx = v_idx; - if (vsi->type == ICE_VSI_VF) - goto out; - /* only set affinity_mask if the CPU is online */ - if (cpu_online(v_idx)) - cpumask_set_cpu(v_idx, &q_vector->affinity_mask); - - /* This will not be called in the driver load path because the netdev - * will not be created yet. All other cases with register the NAPI - * handler here (i.e. resume, reset/rebuild, etc.) - */ - if (vsi->netdev) - netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, - NAPI_POLL_WEIGHT); - out: - /* tie q_vector and VSI together */ - vsi->q_vectors[v_idx] = q_vector; - - return 0; -} - -/** - * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors - * @vsi: the VSI being configured - * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. - */ -static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int v_idx = 0, num_q_vectors; - int err; - - if (vsi->q_vectors[0]) { - dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n", - vsi->vsi_num); - return -EEXIST; - } - - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - num_q_vectors = vsi->num_q_vectors; - } else { - err = -EINVAL; - goto err_out; - } - - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - err = ice_vsi_alloc_q_vector(vsi, v_idx); - if (err) - goto err_out; - } - - return 0; - -err_out: - while (v_idx--) - ice_free_q_vector(vsi, v_idx); - - dev_err(&pf->pdev->dev, - "Failed to allocate %d q_vector for VSI %d, ret=%d\n", - vsi->num_q_vectors, vsi->vsi_num, err); - vsi->num_q_vectors = 0; - return err; + kfree(ctxt); + return ret; } /** @@ -1168,28 +909,26 @@ err_out: static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; u16 num_q_vectors; + dev = ice_pf_to_dev(pf); /* SRIOV doesn't grab irq_tracker entries for each VSI */ if (vsi->type == ICE_VSI_VF) return 0; if (vsi->base_vector) { - dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n", + dev_dbg(dev, "VSI %d has non-zero base vector %d\n", vsi->vsi_num, vsi->base_vector); return -EEXIST; } - if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - return -ENOENT; - num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); if (vsi->base_vector < 0) { - dev_err(&pf->pdev->dev, - "Failed to get tracking for %d vectors for VSI %d, err=%d\n", + dev_err(dev, "Failed to get tracking for %d vectors for VSI %d, err=%d\n", num_q_vectors, vsi->vsi_num, vsi->base_vector); return -ENOENT; } @@ -1231,8 +970,10 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) static int ice_vsi_alloc_rings(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; int i; + dev = ice_pf_to_dev(pf); /* Allocate Tx rings */ for (i = 0; i < vsi->alloc_txq; i++) { struct ice_ring *ring; @@ -1247,7 +988,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->reg_idx = vsi->txq_map[i]; ring->ring_active = false; ring->vsi = vsi; - ring->dev = &pf->pdev->dev; + ring->dev = dev; ring->count = vsi->num_tx_desc; vsi->tx_rings[i] = ring; } @@ -1266,7 +1007,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->ring_active = false; ring->vsi = vsi; ring->netdev = vsi->netdev; - ring->dev = &pf->pdev->dev; + ring->dev = dev; ring->count = vsi->num_rx_desc; vsi->rx_rings[i] = ring; } @@ -1279,66 +1020,6 @@ err_out: } /** - * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors - * @vsi: the VSI being configured - * - * This function maps descriptor rings to the queue-specific vectors allotted - * through the MSI-X enabling code. On a constrained vector budget, we map Tx - * and Rx rings to the vector as "efficiently" as possible. - */ -#ifdef CONFIG_DCB -void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) -#else -static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) -#endif /* CONFIG_DCB */ -{ - int q_vectors = vsi->num_q_vectors; - int tx_rings_rem, rx_rings_rem; - int v_id; - - /* initially assigning remaining rings count to VSIs num queue value */ - tx_rings_rem = vsi->num_txq; - rx_rings_rem = vsi->num_rxq; - - for (v_id = 0; v_id < q_vectors; v_id++) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; - int tx_rings_per_v, rx_rings_per_v, q_id, q_base; - - /* Tx rings mapping to vector */ - tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); - q_vector->num_ring_tx = tx_rings_per_v; - q_vector->tx.ring = NULL; - q_vector->tx.itr_idx = ICE_TX_ITR; - q_base = vsi->num_txq - tx_rings_rem; - - for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { - struct ice_ring *tx_ring = vsi->tx_rings[q_id]; - - tx_ring->q_vector = q_vector; - tx_ring->next = q_vector->tx.ring; - q_vector->tx.ring = tx_ring; - } - tx_rings_rem -= tx_rings_per_v; - - /* Rx rings mapping to vector */ - rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); - q_vector->num_ring_rx = rx_rings_per_v; - q_vector->rx.ring = NULL; - q_vector->rx.itr_idx = ICE_RX_ITR; - q_base = vsi->num_rxq - rx_rings_rem; - - for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { - struct ice_ring *rx_ring = vsi->rx_rings[q_id]; - - rx_ring->q_vector = q_vector; - rx_ring->next = q_vector->rx.ring; - q_vector->rx.ring = rx_ring; - } - rx_rings_rem -= rx_rings_per_v; - } -} - -/** * ice_vsi_manage_rss_lut - disable/enable RSS * @vsi: the VSI being changed * @ena: boolean value indicating if this is an enable or disable request @@ -1352,8 +1033,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) int err = 0; u8 *lut; - lut = devm_kzalloc(&vsi->back->pdev->dev, vsi->rss_table_size, - GFP_KERNEL); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; @@ -1366,7 +1046,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) } err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size); - devm_kfree(&vsi->back->pdev->dev, lut); + kfree(lut); return err; } @@ -1379,12 +1059,14 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) struct ice_aqc_get_set_rss_keys *key; struct ice_pf *pf = vsi->back; enum ice_status status; + struct device *dev; int err = 0; u8 *lut; + dev = ice_pf_to_dev(pf); vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq); - lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; @@ -1397,13 +1079,12 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) vsi->rss_table_size); if (status) { - dev_err(&pf->pdev->dev, - "set_rss_lut failed, error %d\n", status); + dev_err(dev, "set_rss_lut failed, error %d\n", status); err = -EIO; goto ice_vsi_cfg_rss_exit; } - key = devm_kzalloc(&pf->pdev->dev, sizeof(*key), GFP_KERNEL); + key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) { err = -ENOMEM; goto ice_vsi_cfg_rss_exit; @@ -1420,18 +1101,126 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key); if (status) { - dev_err(&pf->pdev->dev, "set_rss_key failed, error %d\n", - status); + dev_err(dev, "set_rss_key failed, error %d\n", status); err = -EIO; } - devm_kfree(&pf->pdev->dev, key); + kfree(key); ice_vsi_cfg_rss_exit: - devm_kfree(&pf->pdev->dev, lut); + kfree(lut); return err; } /** + * ice_vsi_set_vf_rss_flow_fld - Sets VF VSI RSS input set for different flows + * @vsi: VSI to be configured + * + * This function will only be called during the VF VSI setup. Upon successful + * completion of package download, this function will configure default RSS + * input sets for VF VSI. + */ +static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + + dev = ice_pf_to_dev(pf); + if (ice_is_safe_mode(pf)) { + dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n", + vsi->vsi_num); + return; + } + + status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA); + if (status) + dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %d\n", + vsi->vsi_num, status); +} + +/** + * ice_vsi_set_rss_flow_fld - Sets RSS input set for different flows + * @vsi: VSI to be configured + * + * This function will only be called after successful download package call + * during initialization of PF. Since the downloaded package will erase the + * RSS section, this function will configure RSS input sets for different + * flow types. The last profile added has the highest priority, therefore 2 + * tuple profiles (i.e. IPv4 src/dst) are added before 4 tuple profiles + * (i.e. IPv4 src/dst TCP src/dst port). + */ +static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi) +{ + u16 vsi_handle = vsi->idx, vsi_num = vsi->vsi_num; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + struct device *dev; + + dev = ice_pf_to_dev(pf); + if (ice_is_safe_mode(pf)) { + dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n", + vsi_num); + return; + } + /* configure RSS for IPv4 with input set IP src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4, + ICE_FLOW_SEG_HDR_IPV4); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %d\n", + vsi_num, status); + + /* configure RSS for IPv6 with input set IPv6 src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6, + ICE_FLOW_SEG_HDR_IPV6); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %d\n", + vsi_num, status); + + /* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4, + ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %d\n", + vsi_num, status); + + /* configure RSS for udp4 with input set IP src/dst, UDP src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4, + ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %d\n", + vsi_num, status); + + /* configure RSS for sctp4 with input set IP src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4, + ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %d\n", + vsi_num, status); + + /* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6, + ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %d\n", + vsi_num, status); + + /* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6, + ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %d\n", + vsi_num, status); + + /* configure RSS for sctp6 with input set IPv6 src/dst */ + status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6, + ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6); + if (status) + dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n", + vsi_num, status); +} + +/** * ice_add_mac_to_list - Add a MAC address filter entry to the list * @vsi: the VSI to be forwarded to * @add_list: pointer to the list which contains MAC filter entries @@ -1441,13 +1230,14 @@ ice_vsi_cfg_rss_exit: * * Returns 0 on success or ENOMEM on failure. */ -int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, - const u8 *macaddr) +int +ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, + const u8 *macaddr) { struct ice_fltr_list_entry *tmp; struct ice_pf *pf = vsi->back; - tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC); + tmp = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*tmp), GFP_ATOMIC); if (!tmp) return -ENOMEM; @@ -1477,40 +1267,32 @@ void ice_update_eth_stats(struct ice_vsi *vsi) prev_es = &vsi->eth_stats_prev; cur_es = &vsi->eth_stats; - ice_stat_update40(hw, GLV_GORCH(vsi_num), GLV_GORCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_bytes, - &cur_es->rx_bytes); + ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->rx_bytes, &cur_es->rx_bytes); - ice_stat_update40(hw, GLV_UPRCH(vsi_num), GLV_UPRCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_unicast, - &cur_es->rx_unicast); + ice_stat_update40(hw, GLV_UPRCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->rx_unicast, &cur_es->rx_unicast); - ice_stat_update40(hw, GLV_MPRCH(vsi_num), GLV_MPRCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_multicast, - &cur_es->rx_multicast); + ice_stat_update40(hw, GLV_MPRCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->rx_multicast, &cur_es->rx_multicast); - ice_stat_update40(hw, GLV_BPRCH(vsi_num), GLV_BPRCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_broadcast, - &cur_es->rx_broadcast); + ice_stat_update40(hw, GLV_BPRCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->rx_broadcast, &cur_es->rx_broadcast); ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded, &prev_es->rx_discards, &cur_es->rx_discards); - ice_stat_update40(hw, GLV_GOTCH(vsi_num), GLV_GOTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_bytes, - &cur_es->tx_bytes); + ice_stat_update40(hw, GLV_GOTCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->tx_bytes, &cur_es->tx_bytes); - ice_stat_update40(hw, GLV_UPTCH(vsi_num), GLV_UPTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_unicast, - &cur_es->tx_unicast); + ice_stat_update40(hw, GLV_UPTCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->tx_unicast, &cur_es->tx_unicast); - ice_stat_update40(hw, GLV_MPTCH(vsi_num), GLV_MPTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_multicast, - &cur_es->tx_multicast); + ice_stat_update40(hw, GLV_MPTCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->tx_multicast, &cur_es->tx_multicast); - ice_stat_update40(hw, GLV_BPTCH(vsi_num), GLV_BPTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_broadcast, - &cur_es->tx_broadcast); + ice_stat_update40(hw, GLV_BPTCL(vsi_num), vsi->stat_offsets_loaded, + &prev_es->tx_broadcast, &cur_es->tx_broadcast); ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded, &prev_es->tx_errors, &cur_es->tx_errors); @@ -1547,9 +1329,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; int err = 0; - tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + tmp = devm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL); if (!tmp) return -ENOMEM; @@ -1566,11 +1350,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) status = ice_add_vlan(&pf->hw, &tmp_add_list); if (status) { err = -ENODEV; - dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n", - vid, vsi->vsi_num); + dev_err(dev, "Failure Adding VLAN %d on VSI %i\n", vid, + vsi->vsi_num); } - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); return err; } @@ -1587,9 +1371,11 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; int err = 0; - list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); if (!list) return -ENOMEM; @@ -1605,21 +1391,44 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) status = ice_remove_vlan(&pf->hw, &tmp_add_list); if (status == ICE_ERR_DOES_NOT_EXIST) { - dev_dbg(&pf->pdev->dev, - "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n", + dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n", vid, vsi->vsi_num, status); } else if (status) { - dev_err(&pf->pdev->dev, - "Error removing VLAN %d on vsi %i error: %d\n", + dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n", vid, vsi->vsi_num, status); err = -EIO; } - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); return err; } /** + * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length + * @vsi: VSI + */ +void ice_vsi_cfg_frame_size(struct ice_vsi *vsi) +{ + if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) { + vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; + vsi->rx_buf_len = ICE_RXBUF_2048; +#if (PAGE_SIZE < 8192) + } else if (!ICE_2K_TOO_SMALL_WITH_PADDING && + (vsi->netdev->mtu <= ETH_DATA_LEN)) { + vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN; + vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN; +#endif + } else { + vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; +#if (PAGE_SIZE < 8192) + vsi->rx_buf_len = ICE_RXBUF_3072; +#else + vsi->rx_buf_len = ICE_RXBUF_2048; +#endif + } +} + +/** * ice_vsi_cfg_rxqs - Configure the VSI for Rx * @vsi: the VSI being configured * @@ -1633,13 +1442,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_VF) goto setup_rings; - if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN) - vsi->max_frame = vsi->netdev->mtu + - ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - else - vsi->max_frame = ICE_RXBUF_2048; - - vsi->rx_buf_len = ICE_RXBUF_2048; + ice_vsi_cfg_frame_size(vsi); setup_rings: /* set up individual rings */ for (i = 0; i < vsi->num_rxq; i++) { @@ -1647,8 +1450,7 @@ setup_rings: err = ice_setup_rx_ctx(vsi->rx_rings[i]); if (err) { - dev_err(&vsi->back->pdev->dev, - "ice_setup_rx_ctx failed for RxQ %d, err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n", i, err); return err; } @@ -1661,75 +1463,31 @@ setup_rings: * ice_vsi_cfg_txqs - Configure the VSI for Tx * @vsi: the VSI being configured * @rings: Tx ring array to be configured - * @offset: offset within vsi->txq_map * * Return 0 on success and a negative value on error * Configure the Tx VSI for operation. */ static int -ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset) +ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings) { struct ice_aqc_add_tx_qgrp *qg_buf; - struct ice_aqc_add_txqs_perq *txq; - struct ice_pf *pf = vsi->back; - u8 num_q_grps, q_idx = 0; - enum ice_status status; - u16 buf_len, i, pf_q; - int err = 0, tc; + u16 q_idx = 0; + int err = 0; - buf_len = sizeof(*qg_buf); - qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL); + qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL); if (!qg_buf) return -ENOMEM; qg_buf->num_txqs = 1; - num_q_grps = 1; - - /* set up and configure the Tx queues for each enabled TC */ - ice_for_each_traffic_class(tc) { - if (!(vsi->tc_cfg.ena_tc & BIT(tc))) - break; - - for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) { - struct ice_tlan_ctx tlan_ctx = { 0 }; - - pf_q = vsi->txq_map[q_idx + offset]; - ice_setup_tx_ctx(rings[q_idx], &tlan_ctx, pf_q); - /* copy context contents into the qg_buf */ - qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); - ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, - ice_tlan_ctx_info); - - /* init queue specific tail reg. It is referred as - * transmit comm scheduler queue doorbell. - */ - rings[q_idx]->tail = - pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); - status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, - i, num_q_grps, qg_buf, - buf_len, NULL); - if (status) { - dev_err(&pf->pdev->dev, - "Failed to set LAN Tx queue context, error: %d\n", - status); - err = -ENODEV; - goto err_cfg_txqs; - } - /* Add Tx Queue TEID into the VSI Tx ring from the - * response. This will complete configuring and - * enabling the queue. - */ - txq = &qg_buf->txqs[0]; - if (pf_q == le16_to_cpu(txq->txq_id)) - rings[q_idx]->txq_teid = - le32_to_cpu(txq->q_teid); - - q_idx++; - } + for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf); + if (err) + goto err_cfg_txqs; } + err_cfg_txqs: - devm_kfree(&pf->pdev->dev, qg_buf); + kfree(qg_buf); return err; } @@ -1742,159 +1500,46 @@ err_cfg_txqs: */ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi) { - return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0); + return ice_vsi_cfg_txqs(vsi, vsi->tx_rings); } /** - * ice_intrl_usec_to_reg - convert interrupt rate limit to register value - * @intrl: interrupt rate limit in usecs - * @gran: interrupt rate limit granularity in usecs - * - * This function converts a decimal interrupt rate limit in usecs to the format - * expected by firmware. - */ -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) -{ - u32 val = intrl / gran; - - if (val) - return val | GLINT_RATE_INTRL_ENA_M; - return 0; -} - -/** - * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set - * @hw: board specific structure - */ -static void ice_cfg_itr_gran(struct ice_hw *hw) -{ - u32 regval = rd32(hw, GLINT_CTL); - - /* no need to update global register if ITR gran is already set */ - if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) && - (((regval & GLINT_CTL_ITR_GRAN_200_M) >> - GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_100_M) >> - GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_50_M) >> - GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_25_M) >> - GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US)) - return; - - regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) & - GLINT_CTL_ITR_GRAN_200_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) & - GLINT_CTL_ITR_GRAN_100_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) & - GLINT_CTL_ITR_GRAN_50_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) & - GLINT_CTL_ITR_GRAN_25_M); - wr32(hw, GLINT_CTL, regval); -} - -/** - * ice_cfg_itr - configure the initial interrupt throttle values - * @hw: pointer to the HW structure - * @q_vector: interrupt vector that's being configured - * - * Configure interrupt throttling values for the ring containers that are - * associated with the interrupt vector passed in. - */ -static void -ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) -{ - ice_cfg_itr_gran(hw); - - if (q_vector->num_ring_rx) { - struct ice_ring_container *rc = &q_vector->rx; - - /* if this value is set then don't overwrite with default */ - if (!rc->itr_setting) - rc->itr_setting = ICE_DFLT_RX_ITR; - - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } - - if (q_vector->num_ring_tx) { - struct ice_ring_container *rc = &q_vector->tx; - - /* if this value is set then don't overwrite with default */ - if (!rc->itr_setting) - rc->itr_setting = ICE_DFLT_TX_ITR; - - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } -} - -/** - * ice_cfg_txq_interrupt - configure interrupt on Tx queue + * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI * @vsi: the VSI being configured - * @txq: Tx queue being mapped to MSI-X vector - * @msix_idx: MSI-X vector index within the function - * @itr_idx: ITR index of the interrupt cause * - * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector - * within the function space. + * Return 0 on success and a negative value on error + * Configure the Tx queues dedicated for XDP in given VSI for operation. */ -#ifdef CONFIG_PCI_IOV -void -ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) -#else -static void -ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) -#endif /* CONFIG_PCI_IOV */ +int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - u32 val; + int ret; + int i; - itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M; + ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings); + if (ret) + return ret; - val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | - ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M); + for (i = 0; i < vsi->num_xdp_txq; i++) + vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]); - wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); + return ret; } /** - * ice_cfg_rxq_interrupt - configure interrupt on Rx queue - * @vsi: the VSI being configured - * @rxq: Rx queue being mapped to MSI-X vector - * @msix_idx: MSI-X vector index within the function - * @itr_idx: ITR index of the interrupt cause + * ice_intrl_usec_to_reg - convert interrupt rate limit to register value + * @intrl: interrupt rate limit in usecs + * @gran: interrupt rate limit granularity in usecs * - * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector - * within the function space. + * This function converts a decimal interrupt rate limit in usecs to the format + * expected by firmware. */ -#ifdef CONFIG_PCI_IOV -void -ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) -#else -static void -ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) -#endif /* CONFIG_PCI_IOV */ +u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - u32 val; - - itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M; - - val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | - ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M); - - wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + u32 val = intrl / gran; - ice_flush(hw); + if (val) + return val | GLINT_RATE_INTRL_ENA_M; + return 0; } /** @@ -1951,13 +1596,12 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) */ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) { - struct device *dev = &vsi->back->pdev->dev; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; enum ice_status status; int ret = 0; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -1975,7 +1619,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -1983,7 +1627,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) vsi->info.vlan_flags = ctxt->info.vlan_flags; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -1994,13 +1638,12 @@ out: */ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) { - struct device *dev = &vsi->back->pdev->dev; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; enum ice_status status; int ret = 0; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -2022,7 +1665,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", ena, status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -2030,7 +1673,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) vsi->info.vlan_flags = ctxt->info.vlan_flags; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -2057,123 +1700,37 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi) } /** - * ice_trigger_sw_intr - trigger a software interrupt - * @hw: pointer to the HW structure - * @q_vector: interrupt vector to trigger the software interrupt for - */ -void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) -{ - wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), - (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) | - GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_INTENA_M); -} - -/** * ice_vsi_stop_tx_rings - Disable Tx rings * @vsi: the VSI being configured * @rst_src: reset source * @rel_vmvf_num: Relative ID of VF/VM * @rings: Tx ring array to be stopped - * @offset: offset within vsi->txq_map */ static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring **rings, int offset) + u16 rel_vmvf_num, struct ice_ring **rings) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - int tc, q_idx = 0, err = 0; - u16 *q_ids, *q_handles, i; - enum ice_status status; - u32 *q_teids, val; + u16 q_idx; if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) return -EINVAL; - q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids), - GFP_KERNEL); - if (!q_teids) - return -ENOMEM; - - q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids), - GFP_KERNEL); - if (!q_ids) { - err = -ENOMEM; - goto err_alloc_q_ids; - } - - q_handles = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, - sizeof(*q_handles), GFP_KERNEL); - if (!q_handles) { - err = -ENOMEM; - goto err_alloc_q_handles; - } - - /* set up the Tx queue list to be disabled for each enabled TC */ - ice_for_each_traffic_class(tc) { - if (!(vsi->tc_cfg.ena_tc & BIT(tc))) - break; - - for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) { - struct ice_q_vector *q_vector; - - if (!rings || !rings[q_idx]) { - err = -EINVAL; - goto err_out; - } - - q_ids[i] = vsi->txq_map[q_idx + offset]; - q_teids[i] = rings[q_idx]->txq_teid; - q_handles[i] = i; - - /* clear cause_ena bit for disabled queues */ - val = rd32(hw, QINT_TQCTL(rings[i]->reg_idx)); - val &= ~QINT_TQCTL_CAUSE_ENA_M; - wr32(hw, QINT_TQCTL(rings[i]->reg_idx), val); + for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + struct ice_txq_meta txq_meta = { }; + int status; - /* software is expected to wait for 100 ns */ - ndelay(100); + if (!rings || !rings[q_idx]) + return -EINVAL; - /* trigger a software interrupt for the vector - * associated to the queue to schedule NAPI handler - */ - q_vector = rings[i]->q_vector; - if (q_vector) - ice_trigger_sw_intr(hw, q_vector); + ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta); + status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num, + rings[q_idx], &txq_meta); - q_idx++; - } - status = ice_dis_vsi_txq(vsi->port_info, vsi->idx, tc, - vsi->num_txq, q_handles, q_ids, - q_teids, rst_src, rel_vmvf_num, NULL); - - /* if the disable queue command was exercised during an active - * reset flow, ICE_ERR_RESET_ONGOING is returned. This is not - * an error as the reset operation disables queues at the - * hardware level anyway. - */ - if (status == ICE_ERR_RESET_ONGOING) { - dev_dbg(&pf->pdev->dev, - "Reset in progress. LAN Tx queues already disabled\n"); - } else if (status) { - dev_err(&pf->pdev->dev, - "Failed to disable LAN Tx queues, error: %d\n", - status); - err = -ENODEV; - } + if (status) + return status; } -err_out: - devm_kfree(&pf->pdev->dev, q_handles); - -err_alloc_q_handles: - devm_kfree(&pf->pdev->dev, q_ids); - -err_alloc_q_ids: - devm_kfree(&pf->pdev->dev, q_teids); - - return err; + return 0; } /** @@ -2186,8 +1743,16 @@ int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num) { - return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, - 0); + return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings); +} + +/** + * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings + * @vsi: the VSI being configured + */ +int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi) +{ + return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings); } /** @@ -2201,7 +1766,6 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) { struct ice_vsi_ctx *ctxt; - struct device *dev; struct ice_pf *pf; int status; @@ -2209,29 +1773,20 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) return -EINVAL; pf = vsi->back; - dev = &pf->pdev->dev; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; ctxt->info = vsi->info; - if (ena) { - ctxt->info.sec_flags |= - ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S; + if (ena) ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - } else { - ctxt->info.sec_flags &= - ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + else ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - } if (!vlan_promisc) ctxt->info.valid_sections = - cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID | - ICE_AQ_VSI_PROP_SW_VALID); + cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL); if (status) { @@ -2241,14 +1796,13 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) goto err_out; } - vsi->info.sec_flags = ctxt->info.sec_flags; vsi->info.sw_flags2 = ctxt->info.sw_flags2; - devm_kfree(dev, ctxt); + kfree(ctxt); return 0; err_out: - devm_kfree(dev, ctxt); + kfree(ctxt); return -EIO; } @@ -2276,8 +1830,7 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) struct ice_q_vector *q_vector = vsi->q_vectors[i]; if (!q_vector) { - dev_err(&vsi->back->pdev->dev, - "Failed to set reg_idx on q_vector %d VSI %d\n", + dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n", i, vsi->vsi_num); goto clear_reg_idx; } @@ -2317,8 +1870,10 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; - list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); if (!list) return; @@ -2338,11 +1893,10 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); if (status) - dev_err(&pf->pdev->dev, - "Failure Adding or Removing Ethertype on VSI %i error: %d\n", + dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %d\n", vsi->vsi_num, status); - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); } /** @@ -2357,8 +1911,10 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; - list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); if (!list) return; @@ -2385,12 +1941,11 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); if (status) - dev_err(&pf->pdev->dev, - "Fail %s %s LLDP rule on VSI %i error: %d\n", + dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n", create ? "adding" : "removing", tx ? "TX" : "RX", vsi->vsi_num, status); - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); } /** @@ -2412,7 +1967,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, enum ice_vsi_type type, u16 vf_id) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); enum ice_status status; struct ice_vsi *vsi; int ret, i; @@ -2448,7 +2003,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ice_vsi_set_tc_cfg(vsi); /* create the VSI */ - ret = ice_vsi_init(vsi); + ret = ice_vsi_init(vsi, true); if (ret) goto unroll_get_qs; @@ -2476,8 +2031,10 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, * receive traffic on first queue. Hence no need to capture * return value */ - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_rss_flow_fld(vsi); + } break; case ICE_VSI_VF: /* VF driver will take care of creating netdev for this type and @@ -2497,15 +2054,14 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; - pf->q_left_tx -= vsi->alloc_txq; - pf->q_left_rx -= vsi->alloc_rxq; - /* Do not exit if configuring RSS had an issue, at least * receive traffic on first queue. Hence no need to capture * return value */ - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_vf_rss_flow_fld(vsi); + } break; case ICE_VSI_LB: ret = ice_vsi_alloc_rings(vsi); @@ -2519,13 +2075,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, /* configure VSI nodes based on number of queues and TC's */ for (i = 0; i < vsi->tc_cfg.numtc; i++) - max_txqs[i] = pf->num_lan_tx; + max_txqs[i] = vsi->alloc_txq; status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(&pf->pdev->dev, - "VSI %d failed lan queue config, error %d\n", + dev_err(dev, "VSI %d failed lan queue config, error %d\n", vsi->vsi_num, status); goto unroll_vector_base; } @@ -2535,21 +2090,17 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, * out PAUSE or PFC frames. If enabled, FW can still send FC frames. * The rule is added once for PF VSI in order to create appropriate * recipe, since VSI/VSI list is ignored with drop action... - * Also add rules to handle LLDP Tx and Rx packets. Tx LLDP packets - * need to be dropped so that VFs cannot send LLDP packets to reconfig - * DCB settings in the HW. Also, if the FW DCBX engine is not running - * then Rx LLDP packets need to be redirected up the stack. + * Also add rules to handle LLDP Tx packets. Tx LLDP packets need to + * be dropped so that VFs cannot send LLDP packets to reconfig DCB + * settings in the HW. */ - if (vsi->type == ICE_VSI_PF) { - ice_vsi_add_rem_eth_mac(vsi, true); + if (!ice_is_safe_mode(pf)) + if (vsi->type == ICE_VSI_PF) { + ice_vsi_add_rem_eth_mac(vsi, true); - /* Tx LLDP packets */ - ice_cfg_sw_lldp(vsi, true, true); - - /* Rx LLDP packets */ - if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) - ice_cfg_sw_lldp(vsi, false, true); - } + /* Tx LLDP packets */ + ice_cfg_sw_lldp(vsi, true, true); + } return vsi; @@ -2563,8 +2114,6 @@ unroll_vsi_init: ice_vsi_delete(vsi); unroll_get_qs: ice_vsi_put_qs(vsi); - pf->q_left_tx += vsi->alloc_txq; - pf->q_left_rx += vsi->alloc_rxq; ice_vsi_clear(vsi); return NULL; @@ -2590,6 +2139,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0); for (q = 0; q < q_vector->num_ring_tx; q++) { wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); + if (ice_is_xdp_ena_vsi(vsi)) { + u32 xdp_txq = txq + vsi->num_xdp_txq; + + wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0); + } txq++; } @@ -2610,39 +2164,35 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; int base = vsi->base_vector; + int i; - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - int i; - - if (!vsi->q_vectors || !vsi->irqs_ready) - return; + if (!vsi->q_vectors || !vsi->irqs_ready) + return; - ice_vsi_release_msix(vsi); - if (vsi->type == ICE_VSI_VF) - return; + ice_vsi_release_msix(vsi); + if (vsi->type == ICE_VSI_VF) + return; - vsi->irqs_ready = false; - ice_for_each_q_vector(vsi, i) { - u16 vector = i + base; - int irq_num; + vsi->irqs_ready = false; + ice_for_each_q_vector(vsi, i) { + u16 vector = i + base; + int irq_num; - irq_num = pf->msix_entries[vector].vector; + irq_num = pf->msix_entries[vector].vector; - /* free only the irqs that were actually requested */ - if (!vsi->q_vectors[i] || - !(vsi->q_vectors[i]->num_ring_tx || - vsi->q_vectors[i]->num_ring_rx)) - continue; + /* free only the irqs that were actually requested */ + if (!vsi->q_vectors[i] || + !(vsi->q_vectors[i]->num_ring_tx || + vsi->q_vectors[i]->num_ring_rx)) + continue; - /* clear the affinity notifier in the IRQ descriptor */ - irq_set_affinity_notifier(irq_num, NULL); + /* clear the affinity notifier in the IRQ descriptor */ + irq_set_affinity_notifier(irq_num, NULL); - /* clear the affinity_mask in the IRQ descriptor */ - irq_set_affinity_hint(irq_num, NULL); - synchronize_irq(irq_num); - devm_free_irq(&pf->pdev->dev, irq_num, - vsi->q_vectors[i]); - } + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(irq_num, NULL); + synchronize_irq(irq_num); + devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]); } } @@ -2693,6 +2243,62 @@ void ice_vsi_close(struct ice_vsi *vsi) } /** + * ice_ena_vsi - resume a VSI + * @vsi: the VSI being resume + * @locked: is the rtnl_lock already held + */ +int ice_ena_vsi(struct ice_vsi *vsi, bool locked) +{ + int err = 0; + + if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) + return 0; + + clear_bit(__ICE_NEEDS_RESTART, vsi->state); + + if (vsi->netdev && vsi->type == ICE_VSI_PF) { + if (netif_running(vsi->netdev)) { + if (!locked) + rtnl_lock(); + + err = ice_open(vsi->netdev); + + if (!locked) + rtnl_unlock(); + } + } + + return err; +} + +/** + * ice_dis_vsi - pause a VSI + * @vsi: the VSI being paused + * @locked: is the rtnl_lock already held + */ +void ice_dis_vsi(struct ice_vsi *vsi, bool locked) +{ + if (test_bit(__ICE_DOWN, vsi->state)) + return; + + set_bit(__ICE_NEEDS_RESTART, vsi->state); + + if (vsi->type == ICE_VSI_PF && vsi->netdev) { + if (netif_running(vsi->netdev)) { + if (!locked) + rtnl_lock(); + + ice_stop(vsi->netdev); + + if (!locked) + rtnl_unlock(); + } else { + ice_vsi_close(vsi); + } + } +} + +/** * ice_free_res - free a block of resources * @res: pointer to the resource * @index: starting index previously returned by ice_get_res @@ -2772,8 +2378,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) return -EINVAL; if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { - dev_err(&pf->pdev->dev, - "param err: needed=%d, num_entries = %d id=0x%04x\n", + dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n", needed, res->num_entries, id); return -EINVAL; } @@ -2821,15 +2426,20 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) } /* disable each interrupt */ - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - ice_for_each_q_vector(vsi, i) - wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0); + ice_for_each_q_vector(vsi, i) { + if (!vsi->q_vectors[i]) + continue; + wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0); + } + + ice_flush(hw); - ice_flush(hw); + /* don't call synchronize_irq() for VF's from the host */ + if (vsi->type == ICE_VSI_VF) + return; - ice_for_each_q_vector(vsi, i) - synchronize_irq(pf->msix_entries[i + base].vector); - } + ice_for_each_q_vector(vsi, i) + synchronize_irq(pf->msix_entries[i + base].vector); } /** @@ -2889,14 +2499,16 @@ int ice_vsi_release(struct ice_vsi *vsi) pf->num_avail_sw_msix += vsi->num_q_vectors; } - if (vsi->type == ICE_VSI_PF) { - ice_vsi_add_rem_eth_mac(vsi, false); - ice_cfg_sw_lldp(vsi, true, false); - /* The Rx rule will only exist to remove if the LLDP FW - * engine is currently stopped - */ - if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) - ice_cfg_sw_lldp(vsi, false, false); + if (!ice_is_safe_mode(pf)) { + if (vsi->type == ICE_VSI_PF) { + ice_vsi_add_rem_eth_mac(vsi, false); + ice_cfg_sw_lldp(vsi, true, false); + /* The Rx rule will only exist to remove if the LLDP FW + * engine is currently stopped + */ + if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) + ice_cfg_sw_lldp(vsi, false, false); + } } ice_remove_vsi_fltr(&pf->hw, vsi->idx); @@ -2913,8 +2525,6 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_vsi_clear_rings(vsi); ice_vsi_put_qs(vsi); - pf->q_left_tx += vsi->alloc_txq; - pf->q_left_rx += vsi->alloc_rxq; /* retain SW VSI data structure since it is needed to unregister and * free VSI netdev when PF is not in reset recovery pending state,\ @@ -2927,14 +2537,108 @@ int ice_vsi_release(struct ice_vsi *vsi) } /** + * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector + * @q_vector: pointer to q_vector which is being updated + * @coalesce: pointer to array of struct with stored coalesce + * + * Set coalesce param in q_vector and update these parameters in HW. + */ +static void +ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector, + struct ice_coalesce_stored *coalesce) +{ + struct ice_ring_container *rx_rc = &q_vector->rx; + struct ice_ring_container *tx_rc = &q_vector->tx; + struct ice_hw *hw = &q_vector->vsi->back->hw; + + tx_rc->itr_setting = coalesce->itr_tx; + rx_rc->itr_setting = coalesce->itr_rx; + + /* dynamic ITR values will be updated during Tx/Rx */ + if (!ITR_IS_DYNAMIC(tx_rc->itr_setting)) + wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(tx_rc->itr_setting) >> + ICE_ITR_GRAN_S); + if (!ITR_IS_DYNAMIC(rx_rc->itr_setting)) + wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rx_rc->itr_setting) >> + ICE_ITR_GRAN_S); + + q_vector->intrl = coalesce->intrl; + wr32(hw, GLINT_RATE(q_vector->reg_idx), + ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); +} + +/** + * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors + * @vsi: VSI connected with q_vectors + * @coalesce: array of struct with stored coalesce + * + * Returns array size. + */ +static int +ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi, + struct ice_coalesce_stored *coalesce) +{ + int i; + + ice_for_each_q_vector(vsi, i) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + coalesce[i].itr_tx = q_vector->tx.itr_setting; + coalesce[i].itr_rx = q_vector->rx.itr_setting; + coalesce[i].intrl = q_vector->intrl; + } + + return vsi->num_q_vectors; +} + +/** + * ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays + * @vsi: VSI connected with q_vectors + * @coalesce: pointer to array of struct with stored coalesce + * @size: size of coalesce array + * + * Before this function, ice_vsi_rebuild_get_coalesce should be called to save + * ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce + * to default value. + */ +static void +ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, + struct ice_coalesce_stored *coalesce, int size) +{ + int i; + + if ((size && !coalesce) || !vsi) + return; + + for (i = 0; i < size && i < vsi->num_q_vectors; i++) + ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], + &coalesce[i]); + + for (; i < vsi->num_q_vectors; i++) { + struct ice_coalesce_stored coalesce_dflt = { + .itr_tx = ICE_DFLT_TX_ITR, + .itr_rx = ICE_DFLT_RX_ITR, + .intrl = 0 + }; + ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], + &coalesce_dflt); + } +} + +/** * ice_vsi_rebuild - Rebuild VSI after reset * @vsi: VSI to be rebuild + * @init_vsi: is this an initialization or a reconfigure of the VSI * * Returns 0 on success and negative value on failure */ -int ice_vsi_rebuild(struct ice_vsi *vsi) +int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct ice_coalesce_stored *coalesce; + int prev_num_q_vectors = 0; struct ice_vf *vf = NULL; enum ice_status status; struct ice_pf *pf; @@ -2947,6 +2651,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_VF) vf = &pf->vf[vsi->vf_id]; + coalesce = kcalloc(vsi->num_q_vectors, + sizeof(struct ice_coalesce_stored), GFP_KERNEL); + if (coalesce) + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, + coalesce); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi); @@ -2962,21 +2671,28 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) vsi->base_vector = 0; } + if (ice_is_xdp_ena_vsi(vsi)) + /* return value check can be skipped here, it always returns + * 0 if reset is in progress + */ + ice_destroy_xdp_rings(vsi); + ice_vsi_put_qs(vsi); ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); - ice_dev_onetime_setup(&pf->hw); if (vsi->type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf->vf_id); else ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); - ice_vsi_set_tc_cfg(vsi); - /* Initialize VSI struct elements and create VSI in FW */ - ret = ice_vsi_init(vsi); + ret = ice_vsi_alloc_arrays(vsi); if (ret < 0) goto err_vsi; - ret = ice_vsi_alloc_arrays(vsi); + ice_vsi_get_qs(vsi); + ice_vsi_set_tc_cfg(vsi); + + /* Initialize VSI struct elements and create VSI in FW */ + ret = ice_vsi_init(vsi, init_vsi); if (ret < 0) goto err_vsi; @@ -2986,6 +2702,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) if (ret) goto err_rings; + ret = ice_vsi_setup_vector_base(vsi); + if (ret) + goto err_vectors; + ret = ice_vsi_set_q_vectors_reg_idx(vsi); if (ret) goto err_vectors; @@ -2995,6 +2715,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) goto err_vectors; ice_vsi_map_rings_to_vectors(vsi); + if (ice_is_xdp_ena_vsi(vsi)) { + vsi->num_xdp_txq = vsi->alloc_txq; + ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); + if (ret) + goto err_vectors; + } /* Do not exit if configuring RSS had an issue, at least * receive traffic on first queue. Hence no need to capture * return value @@ -3007,10 +2733,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) if (ret) goto err_rings; - ret = ice_vsi_setup_vector_base(vsi); - if (ret) - goto err_vectors; - ret = ice_vsi_set_q_vectors_reg_idx(vsi); if (ret) goto err_vectors; @@ -3019,25 +2741,34 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) if (ret) goto err_vectors; - pf->q_left_tx -= vsi->alloc_txq; - pf->q_left_rx -= vsi->alloc_rxq; break; default: break; } /* configure VSI nodes based on number of queues and TC's */ - for (i = 0; i < vsi->tc_cfg.numtc; i++) - max_txqs[i] = pf->num_lan_tx; + for (i = 0; i < vsi->tc_cfg.numtc; i++) { + max_txqs[i] = vsi->alloc_txq; + + if (ice_is_xdp_ena_vsi(vsi)) + max_txqs[i] += vsi->num_xdp_txq; + } status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(&pf->pdev->dev, - "VSI %d failed lan queue config, error %d\n", + dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n", vsi->vsi_num, status); - goto err_vectors; + if (init_vsi) { + ret = -EIO; + goto err_vectors; + } else { + return ice_schedule_reset(pf, ICE_RESET_PFR); + } } + ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); + kfree(coalesce); + return 0; err_vectors: @@ -3052,6 +2783,7 @@ err_rings: err_vsi: ice_vsi_clear(vsi); set_bit(__ICE_RESET_FAILED, pf->state); + kfree(coalesce); return ret; } @@ -3062,6 +2794,7 @@ err_vsi: bool ice_is_reset_in_progress(unsigned long *state) { return test_bit(__ICE_RESET_OICR_RECV, state) || + test_bit(__ICE_DCBNL_DEVRESET, state) || test_bit(__ICE_PFR_REQ, state) || test_bit(__ICE_CORER_REQ, state) || test_bit(__ICE_GLOBR_REQ, state); @@ -3083,48 +2816,6 @@ static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx) } /** - * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration - * @vsi: the VSI being configured - * @ena_tc: TC map to be enabled - */ -static void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) -{ - struct net_device *netdev = vsi->netdev; - struct ice_pf *pf = vsi->back; - struct ice_dcbx_cfg *dcbcfg; - u8 netdev_tc; - int i; - - if (!netdev) - return; - - if (!ena_tc) { - netdev_reset_tc(netdev); - return; - } - - if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc)) - return; - - dcbcfg = &pf->hw.port_info->local_dcbx_cfg; - - ice_for_each_traffic_class(i) - if (vsi->tc_cfg.ena_tc & BIT(i)) - netdev_set_tc_queue(netdev, - vsi->tc_cfg.tc_info[i].netdev_tc, - vsi->tc_cfg.tc_info[i].qcount_tx, - vsi->tc_cfg.tc_info[i].qoffset); - - for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - u8 ets_tc = dcbcfg->etscfg.prio_table[i]; - - /* Get the mapped netdev TC# for the UP */ - netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; - netdev_set_prio_tc_map(netdev, i, netdev_tc); - } -} - -/** * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map * @vsi: VSI to be configured * @ena_tc: TC bitmap @@ -3134,24 +2825,27 @@ static void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; - struct ice_vsi_ctx *ctx; struct ice_pf *pf = vsi->back; + struct ice_vsi_ctx *ctx; enum ice_status status; + struct device *dev; int i, ret = 0; u8 num_tc = 0; + dev = ice_pf_to_dev(pf); + ice_for_each_traffic_class(i) { /* build bitmap of enabled TCs */ if (ena_tc & BIT(i)) num_tc++; /* populate max_txqs per TC */ - max_txqs[i] = pf->num_lan_tx; + max_txqs[i] = vsi->alloc_txq; } vsi->tc_cfg.ena_tc = ena_tc; vsi->tc_cfg.numtc = num_tc; - ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -3164,7 +2858,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL); if (status) { - dev_info(&pf->pdev->dev, "Failed VSI Update\n"); + dev_info(dev, "Failed VSI Update\n"); ret = -EIO; goto out; } @@ -3173,8 +2867,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) max_txqs); if (status) { - dev_err(&pf->pdev->dev, - "VSI %d failed TC config, error %d\n", + dev_err(dev, "VSI %d failed TC config, error %d\n", vsi->vsi_num, status); ret = -EIO; goto out; @@ -3184,7 +2877,197 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) ice_vsi_cfg_netdev_tc(vsi, ena_tc); out: - devm_kfree(&pf->pdev->dev, ctx); + kfree(ctx); return ret; } #endif /* CONFIG_DCB */ + +/** + * ice_update_ring_stats - Update ring statistics + * @ring: ring to update + * @cont: used to increment per-vector counters + * @pkts: number of processed packets + * @bytes: number of processed bytes + * + * This function assumes that caller has acquired a u64_stats_sync lock. + */ +static void +ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, + u64 pkts, u64 bytes) +{ + ring->stats.bytes += bytes; + ring->stats.pkts += pkts; + cont->total_bytes += bytes; + cont->total_pkts += pkts; +} + +/** + * ice_update_tx_ring_stats - Update Tx ring specific counters + * @tx_ring: ring to update + * @pkts: number of processed packets + * @bytes: number of processed bytes + */ +void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) +{ + u64_stats_update_begin(&tx_ring->syncp); + ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes); + u64_stats_update_end(&tx_ring->syncp); +} + +/** + * ice_update_rx_ring_stats - Update Rx ring specific counters + * @rx_ring: ring to update + * @pkts: number of processed packets + * @bytes: number of processed bytes + */ +void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) +{ + u64_stats_update_begin(&rx_ring->syncp); + ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes); + u64_stats_update_end(&rx_ring->syncp); +} + +/** + * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI + * @vsi: the VSI being configured MAC filter + * @macaddr: the MAC address to be added. + * @set: Add or delete a MAC filter + * + * Adds or removes MAC address filter entry for VF VSI + */ +enum ice_status +ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set) +{ + LIST_HEAD(tmp_add_list); + enum ice_status status; + + /* Update MAC filter list to be added or removed for a VSI */ + if (ice_add_mac_to_list(vsi, &tmp_add_list, macaddr)) { + status = ICE_ERR_NO_MEMORY; + goto cfg_mac_fltr_exit; + } + + if (set) + status = ice_add_mac(&vsi->back->hw, &tmp_add_list); + else + status = ice_remove_mac(&vsi->back->hw, &tmp_add_list); + +cfg_mac_fltr_exit: + ice_free_fltr_list(ice_pf_to_dev(vsi->back), &tmp_add_list); + return status; +} + +/** + * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used + * @sw: switch to check if its default forwarding VSI is free + * + * Return true if the default forwarding VSI is already being used, else returns + * false signalling that it's available to use. + */ +bool ice_is_dflt_vsi_in_use(struct ice_sw *sw) +{ + return (sw->dflt_vsi && sw->dflt_vsi_ena); +} + +/** + * ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI + * @sw: switch for the default forwarding VSI to compare against + * @vsi: VSI to compare against default forwarding VSI + * + * If this VSI passed in is the default forwarding VSI then return true, else + * return false + */ +bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi) +{ + return (sw->dflt_vsi == vsi && sw->dflt_vsi_ena); +} + +/** + * ice_set_dflt_vsi - set the default forwarding VSI + * @sw: switch used to assign the default forwarding VSI + * @vsi: VSI getting set as the default forwarding VSI on the switch + * + * If the VSI passed in is already the default VSI and it's enabled just return + * success. + * + * If there is already a default VSI on the switch and it's enabled then return + * -EEXIST since there can only be one default VSI per switch. + * + * Otherwise try to set the VSI passed in as the switch's default VSI and + * return the result. + */ +int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi) +{ + enum ice_status status; + struct device *dev; + + if (!sw || !vsi) + return -EINVAL; + + dev = ice_pf_to_dev(vsi->back); + + /* the VSI passed in is already the default VSI */ + if (ice_is_vsi_dflt_vsi(sw, vsi)) { + dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n", + vsi->vsi_num); + return 0; + } + + /* another VSI is already the default VSI for this switch */ + if (ice_is_dflt_vsi_in_use(sw)) { + dev_err(dev, "Default forwarding VSI %d already in use, disable it and try again\n", + sw->dflt_vsi->vsi_num); + return -EEXIST; + } + + status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX); + if (status) { + dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n", + vsi->vsi_num, status); + return -EIO; + } + + sw->dflt_vsi = vsi; + sw->dflt_vsi_ena = true; + + return 0; +} + +/** + * ice_clear_dflt_vsi - clear the default forwarding VSI + * @sw: switch used to clear the default VSI + * + * If the switch has no default VSI or it's not enabled then return error. + * + * Otherwise try to clear the default VSI and return the result. + */ +int ice_clear_dflt_vsi(struct ice_sw *sw) +{ + struct ice_vsi *dflt_vsi; + enum ice_status status; + struct device *dev; + + if (!sw) + return -EINVAL; + + dev = ice_pf_to_dev(sw->pf); + + dflt_vsi = sw->dflt_vsi; + + /* there is no default VSI configured */ + if (!ice_is_dflt_vsi_in_use(sw)) + return -ENODEV; + + status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false, + ICE_FLTR_RX); + if (status) { + dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n", + dflt_vsi->vsi_num, status); + return -EIO; + } + + sw->dflt_vsi = NULL; + sw->dflt_vsi_ena = false; + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 6e43ef03bfc3..e2c0dadce920 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -6,8 +6,11 @@ #include "ice.h" -int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, - const u8 *macaddr); +const char *ice_vsi_type_str(enum ice_vsi_type type); + +int +ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, + const u8 *macaddr); void ice_free_fltr_list(struct device *dev, struct list_head *h); @@ -19,14 +22,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi); void ice_vsi_cfg_msix(struct ice_vsi *vsi); -#ifdef CONFIG_PCI_IOV -void -ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx); - -void -ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); -#endif /* CONFIG_PCI_IOV */ - int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid); int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid); @@ -43,6 +38,10 @@ int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num); +int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi); + +int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi); + int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); @@ -65,25 +64,21 @@ int ice_vsi_release(struct ice_vsi *vsi); void ice_vsi_close(struct ice_vsi *vsi); +int ice_ena_vsi(struct ice_vsi *vsi, bool locked); + +void ice_dis_vsi(struct ice_vsi *vsi, bool locked); + int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id); int ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id); -int ice_vsi_rebuild(struct ice_vsi *vsi); +int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi); bool ice_is_reset_in_progress(unsigned long *state); -void ice_vsi_free_q_vectors(struct ice_vsi *vsi); - -void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); - void ice_vsi_put_qs(struct ice_vsi *vsi); -#ifdef CONFIG_DCB -void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi); -#endif /* CONFIG_DCB */ - void ice_vsi_dis_irq(struct ice_vsi *vsi); void ice_vsi_free_irq(struct ice_vsi *vsi); @@ -94,5 +89,24 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi); int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); +void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); + +void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); + +void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); + u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran); + +enum ice_status +ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); + +bool ice_is_safe_mode(struct ice_pf *pf); + +bool ice_is_dflt_vsi_in_use(struct ice_sw *sw); + +bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); + +int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); + +int ice_clear_dflt_vsi(struct ice_sw *sw); #endif /* !_ICE_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 63db08d9bafa..5ef28052c0f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6,19 +6,32 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "ice.h" +#include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" +#include "ice_dcb_nl.h" -#define DRV_VERSION "0.7.4-k" +#define DRV_VERSION_MAJOR 0 +#define DRV_VERSION_MINOR 8 +#define DRV_VERSION_BUILD 2 + +#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ + __stringify(DRV_VERSION_MINOR) "." \ + __stringify(DRV_VERSION_BUILD) "-k" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" const char ice_drv_ver[] = DRV_VERSION; static const char ice_driver_string[] = DRV_SUMMARY; static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; +/* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */ +#define ICE_DDP_PKG_PATH "intel/ice/ddp/" +#define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg" + MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION(DRV_SUMMARY); MODULE_LICENSE("GPL v2"); MODULE_VERSION(DRV_VERSION); +MODULE_FIRMWARE(ICE_DDP_PKG_FILE); static int debug = -1; module_param(debug, int, 0644); @@ -29,24 +42,24 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */ static struct workqueue_struct *ice_wq; +static const struct net_device_ops ice_netdev_safe_mode_ops; static const struct net_device_ops ice_netdev_ops; +static int ice_vsi_open(struct ice_vsi *vsi); -static void ice_rebuild(struct ice_pf *pf); +static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type); static void ice_vsi_release_all(struct ice_pf *pf); -static void ice_update_vsi_stats(struct ice_vsi *vsi); -static void ice_update_pf_stats(struct ice_pf *pf); /** * ice_get_tx_pending - returns number of Tx descriptors not processed * @ring: the ring of descriptors */ -static u32 ice_get_tx_pending(struct ice_ring *ring) +static u16 ice_get_tx_pending(struct ice_ring *ring) { - u32 head, tail; + u16 head, tail; head = ring->next_to_clean; - tail = readl(ring->tail); + tail = ring->next_to_use; if (head != tail) return (head < tail) ? @@ -118,12 +131,11 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) */ static int ice_init_mac_fltr(struct ice_pf *pf) { - LIST_HEAD(tmp_add_list); + enum ice_status status; u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; - int status; - vsi = ice_find_vsi_by_type(pf, ICE_VSI_PF); + vsi = ice_get_main_vsi(pf); if (!vsi) return -EINVAL; @@ -132,8 +144,7 @@ static int ice_init_mac_fltr(struct ice_pf *pf) */ /* Add a unicast MAC filter so the VSI can get its packets */ - status = ice_add_mac_to_list(vsi, &tmp_add_list, - vsi->port_info->mac.perm_addr); + status = ice_vsi_cfg_mac_fltr(vsi, vsi->port_info->mac.perm_addr, true); if (status) goto unregister; @@ -141,32 +152,24 @@ static int ice_init_mac_fltr(struct ice_pf *pf) * MAC address to the list as well. */ eth_broadcast_addr(broadcast); - status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast); + status = ice_vsi_cfg_mac_fltr(vsi, broadcast, true); if (status) - goto free_mac_list; - - /* Program MAC filters for entries in tmp_add_list */ - status = ice_add_mac(&pf->hw, &tmp_add_list); - if (status) - status = -ENOMEM; - -free_mac_list: - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + goto unregister; + return 0; unregister: /* We aren't useful with no MAC filters, so unregister if we * had an error */ if (status && vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(&pf->pdev->dev, - "Could not add MAC filters error %d. Unregistering device\n", + dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %d. Unregistering device\n", status); unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } - return status; + return -EIO; } /** @@ -265,7 +268,7 @@ static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc) */ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) { - struct device *dev = &vsi->back->pdev->dev; + struct device *dev = ice_pf_to_dev(vsi->back); struct net_device *netdev = vsi->netdev; bool promisc_forced_on = false; struct ice_pf *pf = vsi->back; @@ -331,8 +334,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC, vsi->state)) { promisc_forced_on = true; - netdev_warn(netdev, - "Reached MAC filter limit, forcing promisc mode on VSI %d\n", + netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n", vsi->vsi_num); } else { err = -EIO; @@ -375,25 +377,27 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); if (vsi->current_netdev_flags & IFF_PROMISC) { /* Apply Rx filter rule to get traffic from wire */ - status = ice_cfg_dflt_vsi(hw, vsi->idx, true, - ICE_FLTR_RX); - if (status) { - netdev_err(netdev, "Error setting default VSI %i Rx rule\n", - vsi->vsi_num); - vsi->current_netdev_flags &= ~IFF_PROMISC; - err = -EIO; - goto out_promisc; + if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { + err = ice_set_dflt_vsi(pf->first_sw, vsi); + if (err && err != -EEXIST) { + netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n", + err, vsi->vsi_num); + vsi->current_netdev_flags &= + ~IFF_PROMISC; + goto out_promisc; + } } } else { /* Clear Rx filter to remove traffic from wire */ - status = ice_cfg_dflt_vsi(hw, vsi->idx, false, - ICE_FLTR_RX); - if (status) { - netdev_err(netdev, "Error clearing default VSI %i Rx rule\n", - vsi->vsi_num); - vsi->current_netdev_flags |= IFF_PROMISC; - err = -EIO; - goto out_promisc; + if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) { + err = ice_clear_dflt_vsi(pf->first_sw); + if (err) { + netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n", + err, vsi->vsi_num); + vsi->current_netdev_flags |= + IFF_PROMISC; + goto out_promisc; + } } } } @@ -434,42 +438,11 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf) } /** - * ice_dis_vsi - pause a VSI - * @vsi: the VSI being paused - * @locked: is the rtnl_lock already held - */ -static void ice_dis_vsi(struct ice_vsi *vsi, bool locked) -{ - if (test_bit(__ICE_DOWN, vsi->state)) - return; - - set_bit(__ICE_NEEDS_RESTART, vsi->state); - - if (vsi->type == ICE_VSI_PF && vsi->netdev) { - if (netif_running(vsi->netdev)) { - if (!locked) { - rtnl_lock(); - vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); - rtnl_unlock(); - } else { - vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); - } - } else { - ice_vsi_close(vsi); - } - } -} - -/** * ice_pf_dis_all_vsi - Pause all VSIs on a PF * @pf: the PF * @locked: is the rtnl_lock already held */ -#ifdef CONFIG_DCB -void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) -#else static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) -#endif /* CONFIG_DCB */ { int v; @@ -488,6 +461,7 @@ static void ice_prepare_for_reset(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; + int i; /* already prepared for reset */ if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) @@ -497,6 +471,12 @@ ice_prepare_for_reset(struct ice_pf *pf) if (ice_check_sq_alive(hw, &hw->mailboxq)) ice_vc_notify_reset(pf); + /* Disable VFs until reset is completed */ + ice_for_each_vf(pf, i) + ice_set_vf_state_qs_dis(&pf->vf[i]); + + /* clear SW filtering DB */ + ice_clear_hw_tbls(hw); /* disable the VSIs and their queues that are not already DOWN */ ice_pf_dis_all_vsi(pf, false); @@ -516,7 +496,7 @@ ice_prepare_for_reset(struct ice_pf *pf) */ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); @@ -542,7 +522,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) */ if (reset_type == ICE_RESET_PFR) { pf->pfr_count++; - ice_rebuild(pf); + ice_rebuild(pf, reset_type); clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); clear_bit(__ICE_PFR_REQ, pf->state); ice_reset_all_vfs(pf, true); @@ -573,6 +553,8 @@ static void ice_reset_subtask(struct ice_pf *pf) reset_type = ICE_RESET_CORER; if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) reset_type = ICE_RESET_GLOBR; + if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state)) + reset_type = ICE_RESET_EMPR; /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) return; @@ -584,7 +566,7 @@ static void ice_reset_subtask(struct ice_pf *pf) } else { /* done with reset. start rebuild */ pf->hw.reset_ongoing = false; - ice_rebuild(pf); + ice_rebuild(pf, reset_type); /* clear bit to resume normal operations, but * ICE_NEEDS_RESTART bit is set in case rebuild failed */ @@ -618,6 +600,28 @@ static void ice_reset_subtask(struct ice_pf *pf) } /** + * ice_print_topo_conflict - print topology conflict message + * @vsi: the VSI whose topology status is being checked + */ +static void ice_print_topo_conflict(struct ice_vsi *vsi) +{ + switch (vsi->port_info->phy.link_info.topo_media_conflict) { + case ICE_AQ_LINK_TOPO_CONFLICT: + case ICE_AQ_LINK_MEDIA_CONFLICT: + case ICE_AQ_LINK_TOPO_UNREACH_PRT: + case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT: + case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA: + netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n"); + break; + case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: + netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + break; + default: + break; + } +} + +/** * ice_print_link_msg - print link up or down message * @vsi: the VSI whose link status is being queried * @isup: boolean for if the link is now up or down @@ -630,6 +634,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) const char *speed; const char *fec; const char *fc; + const char *an; if (!vsi) return; @@ -713,8 +718,14 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) break; } + /* check if autoneg completed, might be false due to not supported */ + if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) + an = "True"; + else + an = "False"; + /* Get FEC mode requested based on PHY caps last SW configuration */ - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) { fec_req = "Unknown"; goto done; @@ -734,11 +745,12 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) else fec_req = "NONE"; - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); done: - netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Flow Control: %s\n", - speed, fec_req, fec, fc); + netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", + speed, fec_req, fec, an, fc); + ice_print_topo_conflict(vsi); } /** @@ -781,6 +793,7 @@ static int ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, u16 link_speed) { + struct device *dev = ice_pf_to_dev(pf); struct ice_phy_info *phy_info; struct ice_vsi *vsi; u16 old_link_speed; @@ -798,23 +811,35 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, */ result = ice_update_link_info(pi); if (result) - dev_dbg(&pf->pdev->dev, - "Failed to update link status and re-enable link events for port %d\n", + dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n", pi->lport); /* if the old link up/down and speed is the same as the new */ if (link_up == old_link && link_speed == old_link_speed) return result; - vsi = ice_find_vsi_by_type(pf, ICE_VSI_PF); + vsi = ice_get_main_vsi(pf); if (!vsi || !vsi->port_info) return -EINVAL; + /* turn off PHY if media was removed */ + if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) && + !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) { + set_bit(ICE_FLAG_NO_MEDIA, pf->flags); + + result = ice_aq_set_link_restart_an(pi, false, NULL); + if (result) { + dev_dbg(dev, "Failed to set link down, VSI %d error %d\n", + vsi->vsi_num, result); + return result; + } + } + + ice_dcb_rebuild(pf); ice_vsi_link_event(vsi, link_up); ice_print_link_msg(vsi, link_up); - if (pf->num_alloc_vfs) - ice_vc_notify_link_state(pf); + ice_vc_notify_link_state(pf); return result; } @@ -862,15 +887,13 @@ static int ice_init_link_events(struct ice_port_info *pi) ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { - dev_dbg(ice_hw_to_dev(pi->hw), - "Failed to set link event mask for port %d\n", + dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n", pi->lport); return -EIO; } if (ice_aq_get_link_info(pi, true, NULL, NULL)) { - dev_dbg(ice_hw_to_dev(pi->hw), - "Failed to enable link events for port %d\n", + dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n", pi->lport); return -EIO; } @@ -899,8 +922,8 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) !!(link_data->link_info & ICE_AQ_LINK_UP), le16_to_cpu(link_data->link_speed)); if (status) - dev_dbg(&pf->pdev->dev, - "Could not process link event, error %d\n", status); + dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n", + status); return status; } @@ -912,6 +935,7 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) */ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) { + struct device *dev = ice_pf_to_dev(pf); struct ice_rq_event_info event; struct ice_hw *hw = &pf->hw; struct ice_ctl_q_info *cq; @@ -933,8 +957,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) qtype = "Mailbox"; break; default: - dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n", - q_type); + dev_warn(dev, "Unknown control queue type 0x%x\n", q_type); return 0; } @@ -946,16 +969,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) PF_FW_ARQLEN_ARQCRIT_M)) { oldval = val; if (val & PF_FW_ARQLEN_ARQVFE_M) - dev_dbg(&pf->pdev->dev, - "%s Receive Queue VF Error detected\n", qtype); + dev_dbg(dev, "%s Receive Queue VF Error detected\n", + qtype); if (val & PF_FW_ARQLEN_ARQOVFL_M) { - dev_dbg(&pf->pdev->dev, - "%s Receive Queue Overflow Error detected\n", + dev_dbg(dev, "%s Receive Queue Overflow Error detected\n", qtype); } if (val & PF_FW_ARQLEN_ARQCRIT_M) - dev_dbg(&pf->pdev->dev, - "%s Receive Queue Critical Error detected\n", + dev_dbg(dev, "%s Receive Queue Critical Error detected\n", qtype); val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | PF_FW_ARQLEN_ARQCRIT_M); @@ -968,16 +989,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) PF_FW_ATQLEN_ATQCRIT_M)) { oldval = val; if (val & PF_FW_ATQLEN_ATQVFE_M) - dev_dbg(&pf->pdev->dev, - "%s Send Queue VF Error detected\n", qtype); + dev_dbg(dev, "%s Send Queue VF Error detected\n", + qtype); if (val & PF_FW_ATQLEN_ATQOVFL_M) { - dev_dbg(&pf->pdev->dev, - "%s Send Queue Overflow Error detected\n", + dev_dbg(dev, "%s Send Queue Overflow Error detected\n", qtype); } if (val & PF_FW_ATQLEN_ATQCRIT_M) - dev_dbg(&pf->pdev->dev, - "%s Send Queue Critical Error detected\n", + dev_dbg(dev, "%s Send Queue Critical Error detected\n", qtype); val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | PF_FW_ATQLEN_ATQCRIT_M); @@ -986,8 +1005,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) } event.buf_len = cq->rq_buf_size; - event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len, - GFP_KERNEL); + event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); if (!event.msg_buf) return 0; @@ -999,8 +1017,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) if (ret == ICE_ERR_AQ_NO_WORK) break; if (ret) { - dev_err(&pf->pdev->dev, - "%s Receive Queue event error %d\n", qtype, + dev_err(dev, "%s Receive Queue event error %d\n", qtype, ret); break; } @@ -1010,8 +1027,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) switch (opcode) { case ice_aqc_opc_get_link_status: if (ice_handle_link_event(pf, &event)) - dev_err(&pf->pdev->dev, - "Could not handle link event\n"); + dev_err(dev, "Could not handle link event\n"); break; case ice_mbx_opc_send_msg_to_pf: ice_vc_process_vf_msg(pf, &event); @@ -1023,14 +1039,13 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ice_dcb_process_lldp_set_mib_change(pf, &event); break; default: - dev_dbg(&pf->pdev->dev, - "%s Receive Queue unknown event 0x%04x ignored\n", + dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n", qtype, opcode); break; } } while (pending && (i++ < ICE_DFLT_IRQ_WORK)); - devm_kfree(&pf->pdev->dev, event.msg_buf); + kfree(event.msg_buf); return pending && (i == ICE_DFLT_IRQ_WORK); } @@ -1174,6 +1189,7 @@ static void ice_service_timer(struct timer_list *t) */ static void ice_handle_mdd_event(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; bool mdd_detected = false; u32 reg; @@ -1195,7 +1211,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) GL_MDET_TX_PQM_QNUM_S); if (netif_msg_tx_err(pf)) - dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", + dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_TX_PQM, 0xffffffff); mdd_detected = true; @@ -1212,8 +1228,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S); - if (netif_msg_rx_err(pf)) - dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", + if (netif_msg_tx_err(pf)) + dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); mdd_detected = true; @@ -1231,7 +1247,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) GL_MDET_RX_QNUM_S); if (netif_msg_rx_err(pf)) - dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", + dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_RX, 0xffffffff); mdd_detected = true; @@ -1243,21 +1259,21 @@ static void ice_handle_mdd_event(struct ice_pf *pf) reg = rd32(hw, PF_MDET_TX_PQM); if (reg & PF_MDET_TX_PQM_VALID_M) { wr32(hw, PF_MDET_TX_PQM, 0xFFFF); - dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); + dev_info(dev, "TX driver issue detected, PF reset issued\n"); pf_mdd_detected = true; } reg = rd32(hw, PF_MDET_TX_TCLAN); if (reg & PF_MDET_TX_TCLAN_VALID_M) { wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF); - dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); + dev_info(dev, "TX driver issue detected, PF reset issued\n"); pf_mdd_detected = true; } reg = rd32(hw, PF_MDET_RX); if (reg & PF_MDET_RX_VALID_M) { wr32(hw, PF_MDET_RX, 0xFFFF); - dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n"); + dev_info(dev, "RX driver issue detected, PF reset issued\n"); pf_mdd_detected = true; } /* Queue belongs to the PF initiate a reset */ @@ -1268,7 +1284,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) } /* check to see if one of the VFs caused the MDD */ - for (i = 0; i < pf->num_alloc_vfs; i++) { + ice_for_each_vf(pf, i) { struct ice_vf *vf = &pf->vf[i]; bool vf_mdd_detected = false; @@ -1277,7 +1293,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + dev_info(dev, "TX driver issue detected on VF %d\n", i); } @@ -1285,7 +1301,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TCLAN_VALID_M) { wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + dev_info(dev, "TX driver issue detected on VF %d\n", i); } @@ -1293,7 +1309,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TDPU_VALID_M) { wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + dev_info(dev, "TX driver issue detected on VF %d\n", i); } @@ -1301,20 +1317,138 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_RX_VALID_M) { wr32(hw, VP_MDET_RX(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n", + dev_info(dev, "RX driver issue detected on VF %d\n", i); } if (vf_mdd_detected) { vf->num_mdd_events++; - if (vf->num_mdd_events > 1) - dev_info(&pf->pdev->dev, "VF %d has had %llu MDD events since last boot\n", + if (vf->num_mdd_events && + vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD) + dev_info(dev, "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n", i, vf->num_mdd_events); } } } /** + * ice_force_phys_link_state - Force the physical link state + * @vsi: VSI to force the physical link state to up/down + * @link_up: true/false indicates to set the physical link to up/down + * + * Force the physical link state by getting the current PHY capabilities from + * hardware and setting the PHY config based on the determined capabilities. If + * link changes a link event will be triggered because both the Enable Automatic + * Link Update and LESM Enable bits are set when setting the PHY capabilities. + * + * Returns 0 on success, negative on failure + */ +static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) +{ + struct ice_aqc_get_phy_caps_data *pcaps; + struct ice_aqc_set_phy_cfg_data *cfg; + struct ice_port_info *pi; + struct device *dev; + int retcode; + + if (!vsi || !vsi->port_info || !vsi->back) + return -EINVAL; + if (vsi->type != ICE_VSI_PF) + return 0; + + dev = ice_pf_to_dev(vsi->back); + + pi = vsi->port_info; + + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return -ENOMEM; + + retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + NULL); + if (retcode) { + dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", + vsi->vsi_num, retcode); + retcode = -EIO; + goto out; + } + + /* No change in link */ + if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) && + link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) + goto out; + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) { + retcode = -ENOMEM; + goto out; + } + + cfg->phy_type_low = pcaps->phy_type_low; + cfg->phy_type_high = pcaps->phy_type_high; + cfg->caps = pcaps->caps | ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; + cfg->low_power_ctrl = pcaps->low_power_ctrl; + cfg->eee_cap = pcaps->eee_cap; + cfg->eeer_value = pcaps->eeer_value; + cfg->link_fec_opt = pcaps->link_fec_options; + if (link_up) + cfg->caps |= ICE_AQ_PHY_ENA_LINK; + else + cfg->caps &= ~ICE_AQ_PHY_ENA_LINK; + + retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi->lport, cfg, NULL); + if (retcode) { + dev_err(dev, "Failed to set phy config, VSI %d error %d\n", + vsi->vsi_num, retcode); + retcode = -EIO; + } + + kfree(cfg); +out: + kfree(pcaps); + return retcode; +} + +/** + * ice_check_media_subtask - Check for media; bring link up if detected. + * @pf: pointer to PF struct + */ +static void ice_check_media_subtask(struct ice_pf *pf) +{ + struct ice_port_info *pi; + struct ice_vsi *vsi; + int err; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return; + + /* No need to check for media if it's already present or the interface + * is down + */ + if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) || + test_bit(__ICE_DOWN, vsi->state)) + return; + + /* Refresh link info and check if media is present */ + pi = vsi->port_info; + err = ice_update_link_info(pi); + if (err) + return; + + if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + err = ice_force_phys_link_state(vsi, true); + if (err) + return; + clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); + + /* A Link Status Event will be generated; the event handler + * will complete bringing the interface up + */ + } +} + +/** * ice_service_task - manage and run subtasks * @work: pointer to work_struct contained by the PF struct */ @@ -1336,12 +1470,19 @@ static void ice_service_task(struct work_struct *work) return; } + ice_clean_adminq_subtask(pf); + ice_check_media_subtask(pf); ice_check_for_hang_subtask(pf); ice_sync_fltr_subtask(pf); ice_handle_mdd_event(pf); - ice_process_vflr_event(pf); ice_watchdog_subtask(pf); - ice_clean_adminq_subtask(pf); + + if (ice_is_safe_mode(pf)) { + ice_service_task_complete(pf); + return; + } + + ice_process_vflr_event(pf); ice_clean_mailboxq_subtask(pf); /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ @@ -1369,13 +1510,51 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) hw->adminq.num_sq_entries = ICE_AQ_LEN; hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN; hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; - hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN; - hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN; + hw->mailboxq.num_rq_entries = ICE_MBXRQ_LEN; + hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN; hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN; hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN; } /** + * ice_schedule_reset - schedule a reset + * @pf: board private structure + * @reset: reset being requested + */ +int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) +{ + struct device *dev = ice_pf_to_dev(pf); + + /* bail out if earlier reset has failed */ + if (test_bit(__ICE_RESET_FAILED, pf->state)) { + dev_dbg(dev, "earlier reset has failed\n"); + return -EIO; + } + /* bail if reset/recovery already in progress */ + if (ice_is_reset_in_progress(pf->state)) { + dev_dbg(dev, "Reset already in progress\n"); + return -EBUSY; + } + + switch (reset) { + case ICE_RESET_PFR: + set_bit(__ICE_PFR_REQ, pf->state); + break; + case ICE_RESET_CORER: + set_bit(__ICE_CORER_REQ, pf->state); + break; + case ICE_RESET_GLOBR: + set_bit(__ICE_GLOBR_REQ, pf->state); + break; + default: + return -EINVAL; + } + + ice_service_task_schedule(pf); + return 0; +} + +/** * ice_irq_affinity_notify - Callback for affinity changes * @notify: context as to what irq was changed * @mask: the new affinity mask @@ -1409,15 +1588,11 @@ static void ice_irq_affinity_release(struct kref __always_unused *ref) {} */ static int ice_vsi_ena_irq(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - int i; + struct ice_hw *hw = &vsi->back->hw; + int i; - ice_for_each_q_vector(vsi, i) - ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]); - } + ice_for_each_q_vector(vsi, i) + ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]); ice_flush(hw); return 0; @@ -1433,11 +1608,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) int q_vectors = vsi->num_q_vectors; struct ice_pf *pf = vsi->back; int base = vsi->base_vector; + struct device *dev; int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; int irq_num; + dev = ice_pf_to_dev(pf); for (vector = 0; vector < q_vectors; vector++) { struct ice_q_vector *q_vector = vsi->q_vectors[vector]; @@ -1457,12 +1634,11 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - err = devm_request_irq(&pf->pdev->dev, irq_num, - vsi->irq_handler, 0, + err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0, q_vector->name, q_vector); if (err) { - netdev_err(vsi->netdev, - "MSIX request_irq failed, error: %d\n", err); + netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", + err); goto free_q_irqs; } @@ -1484,12 +1660,331 @@ free_q_irqs: irq_num = pf->msix_entries[base + vector].vector, irq_set_affinity_notifier(irq_num, NULL); irq_set_affinity_hint(irq_num, NULL); - devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]); + devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]); } return err; } /** + * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP + * @vsi: VSI to setup Tx rings used by XDP + * + * Return 0 on success and negative value on error + */ +static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) +{ + struct device *dev = ice_pf_to_dev(vsi->back); + int i; + + for (i = 0; i < vsi->num_xdp_txq; i++) { + u16 xdp_q_idx = vsi->alloc_txq + i; + struct ice_ring *xdp_ring; + + xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); + + if (!xdp_ring) + goto free_xdp_rings; + + xdp_ring->q_index = xdp_q_idx; + xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; + xdp_ring->ring_active = false; + xdp_ring->vsi = vsi; + xdp_ring->netdev = NULL; + xdp_ring->dev = dev; + xdp_ring->count = vsi->num_tx_desc; + vsi->xdp_rings[i] = xdp_ring; + if (ice_setup_tx_ring(xdp_ring)) + goto free_xdp_rings; + ice_set_ring_xdp(xdp_ring); + xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring); + } + + return 0; + +free_xdp_rings: + for (; i >= 0; i--) + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + ice_free_tx_ring(vsi->xdp_rings[i]); + return -ENOMEM; +} + +/** + * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI + * @vsi: VSI to set the bpf prog on + * @prog: the bpf prog pointer + */ +static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + int i; + + old_prog = xchg(&vsi->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + ice_for_each_rxq(vsi, i) + WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); +} + +/** + * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP + * @vsi: VSI to bring up Tx rings used by XDP + * @prog: bpf program that will be assigned to VSI + * + * Return 0 on success and negative value on error + */ +int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) +{ + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + int xdp_rings_rem = vsi->num_xdp_txq; + struct ice_pf *pf = vsi->back; + struct ice_qs_cfg xdp_qs_cfg = { + .qs_mutex = &pf->avail_q_mutex, + .pf_map = pf->avail_txqs, + .pf_map_size = pf->max_pf_txqs, + .q_count = vsi->num_xdp_txq, + .scatter_count = ICE_MAX_SCATTER_TXQS, + .vsi_map = vsi->txq_map, + .vsi_map_offset = vsi->alloc_txq, + .mapping_mode = ICE_VSI_MAP_CONTIG + }; + enum ice_status status; + struct device *dev; + int i, v_idx; + + dev = ice_pf_to_dev(pf); + vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq, + sizeof(*vsi->xdp_rings), GFP_KERNEL); + if (!vsi->xdp_rings) + return -ENOMEM; + + vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode; + if (__ice_vsi_get_qs(&xdp_qs_cfg)) + goto err_map_xdp; + + if (ice_xdp_alloc_setup_rings(vsi)) + goto clear_xdp_rings; + + /* follow the logic from ice_vsi_map_rings_to_vectors */ + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + int xdp_rings_per_v, q_id, q_base; + + xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem, + vsi->num_q_vectors - v_idx); + q_base = vsi->num_xdp_txq - xdp_rings_rem; + + for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) { + struct ice_ring *xdp_ring = vsi->xdp_rings[q_id]; + + xdp_ring->q_vector = q_vector; + xdp_ring->next = q_vector->tx.ring; + q_vector->tx.ring = xdp_ring; + } + xdp_rings_rem -= xdp_rings_per_v; + } + + /* omit the scheduler update if in reset path; XDP queues will be + * taken into account at the end of ice_vsi_rebuild, where + * ice_cfg_vsi_lan is being called + */ + if (ice_is_reset_in_progress(pf->state)) + return 0; + + /* tell the Tx scheduler that right now we have + * additional queues + */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq; + + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, + max_txqs); + if (status) { + dev_err(dev, "Failed VSI LAN queue config for XDP, error:%d\n", + status); + goto clear_xdp_rings; + } + ice_vsi_assign_bpf_prog(vsi, prog); + + return 0; +clear_xdp_rings: + for (i = 0; i < vsi->num_xdp_txq; i++) + if (vsi->xdp_rings[i]) { + kfree_rcu(vsi->xdp_rings[i], rcu); + vsi->xdp_rings[i] = NULL; + } + +err_map_xdp: + mutex_lock(&pf->avail_q_mutex); + for (i = 0; i < vsi->num_xdp_txq; i++) { + clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); + vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; + } + mutex_unlock(&pf->avail_q_mutex); + + devm_kfree(dev, vsi->xdp_rings); + return -ENOMEM; +} + +/** + * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings + * @vsi: VSI to remove XDP rings + * + * Detach XDP rings from irq vectors, clean up the PF bitmap and free + * resources + */ +int ice_destroy_xdp_rings(struct ice_vsi *vsi) +{ + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct ice_pf *pf = vsi->back; + int i, v_idx; + + /* q_vectors are freed in reset path so there's no point in detaching + * rings; in case of rebuild being triggered not from reset reset bits + * in pf->state won't be set, so additionally check first q_vector + * against NULL + */ + if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + goto free_qmap; + + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + struct ice_ring *ring; + + ice_for_each_ring(ring, q_vector->tx) + if (!ring->tx_buf || !ice_ring_is_xdp(ring)) + break; + + /* restore the value of last node prior to XDP setup */ + q_vector->tx.ring = ring; + } + +free_qmap: + mutex_lock(&pf->avail_q_mutex); + for (i = 0; i < vsi->num_xdp_txq; i++) { + clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); + vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; + } + mutex_unlock(&pf->avail_q_mutex); + + for (i = 0; i < vsi->num_xdp_txq; i++) + if (vsi->xdp_rings[i]) { + if (vsi->xdp_rings[i]->desc) + ice_free_tx_ring(vsi->xdp_rings[i]); + kfree_rcu(vsi->xdp_rings[i], rcu); + vsi->xdp_rings[i] = NULL; + } + + devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings); + vsi->xdp_rings = NULL; + + if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + return 0; + + ice_vsi_assign_bpf_prog(vsi, NULL); + + /* notify Tx scheduler that we destroyed XDP queues and bring + * back the old number of child nodes + */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq; + + return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, + max_txqs); +} + +/** + * ice_xdp_setup_prog - Add or remove XDP eBPF program + * @vsi: VSI to setup XDP for + * @prog: XDP program + * @extack: netlink extended ack + */ +static int +ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; + bool if_running = netif_running(vsi->netdev); + int ret = 0, xdp_ring_err = 0; + + if (frame_size > vsi->rx_buf_len) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP"); + return -EOPNOTSUPP; + } + + /* need to stop netdev while setting up the program for Rx rings */ + if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) { + ret = ice_down(vsi); + if (ret) { + NL_SET_ERR_MSG_MOD(extack, + "Preparing device for XDP attach failed"); + return ret; + } + } + + if (!ice_is_xdp_ena_vsi(vsi) && prog) { + vsi->num_xdp_txq = vsi->alloc_txq; + xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, + "Setting up XDP Tx resources failed"); + } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { + xdp_ring_err = ice_destroy_xdp_rings(vsi); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, + "Freeing XDP Tx resources failed"); + } else { + ice_vsi_assign_bpf_prog(vsi, prog); + } + + if (if_running) + ret = ice_up(vsi); + + if (!ret && prog && vsi->xsk_umems) { + int i; + + ice_for_each_rxq(vsi, i) { + struct ice_ring *rx_ring = vsi->rx_rings[i]; + + if (rx_ring->xsk_umem) + napi_schedule(&rx_ring->q_vector->napi); + } + } + + return (ret || xdp_ring_err) ? -ENOMEM : 0; +} + +/** + * ice_xdp - implements XDP handler + * @dev: netdevice + * @xdp: XDP command + */ +static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + + if (vsi->type != ICE_VSI_PF) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "XDP can be loaded only on PF VSI"); + return -EINVAL; + } + + switch (xdp->command) { + case XDP_SETUP_PROG: + return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + case XDP_QUERY_PROG: + xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; + return 0; + case XDP_SETUP_XSK_UMEM: + return ice_xsk_umem_setup(vsi, xdp->xsk.umem, + xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + +/** * ice_ena_misc_vector - enable the non-queue interrupts * @pf: board private structure */ @@ -1527,8 +2022,10 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) struct ice_pf *pf = (struct ice_pf *)data; struct ice_hw *hw = &pf->hw; irqreturn_t ret = IRQ_NONE; + struct device *dev; u32 oicr, ena_mask; + dev = ice_pf_to_dev(pf); set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); @@ -1564,8 +2061,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) else if (reset == ICE_RESET_EMPR) pf->empr_count++; else - dev_dbg(&pf->pdev->dev, "Invalid reset type %d\n", - reset); + dev_dbg(dev, "Invalid reset type %d\n", reset); /* If a reset cycle isn't already in progress, we set a bit in * pf->state so that the service task can start a reset/rebuild. @@ -1599,8 +2095,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_HMC_ERR_M) { ena_mask &= ~PFINT_OICR_HMC_ERR_M; - dev_dbg(&pf->pdev->dev, - "HMC Error interrupt - info 0x%x, data 0x%x\n", + dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n", rd32(hw, PFHMC_ERRORINFO), rd32(hw, PFHMC_ERRORDATA)); } @@ -1608,8 +2103,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) /* Report any remaining unexpected interrupts */ oicr &= ena_mask; if (oicr) { - dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n", - oicr); + dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr); /* If a critical error is pending there is no choice but to * reset the device. */ @@ -1665,9 +2159,9 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, 0); ice_flush(hw); - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { + if (pf->msix_entries) { synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); - devm_free_irq(&pf->pdev->dev, + devm_free_irq(ice_pf_to_dev(pf), pf->msix_entries[pf->oicr_idx].vector, pf); } @@ -1711,13 +2205,13 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx) */ static int ice_req_irq_msix_misc(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int oicr_idx, err = 0; if (!pf->int_name[0]) snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", - dev_driver_string(&pf->pdev->dev), - dev_name(&pf->pdev->dev)); + dev_driver_string(dev), dev_name(dev)); /* Do not request IRQ but do enable OICR interrupt since settings are * lost during reset. Note that this function is called only during @@ -1734,12 +2228,10 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) pf->num_avail_sw_msix -= 1; pf->oicr_idx = oicr_idx; - err = devm_request_irq(&pf->pdev->dev, - pf->msix_entries[pf->oicr_idx].vector, + err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector, ice_misc_intr, 0, pf->int_name, pf); if (err) { - dev_err(&pf->pdev->dev, - "devm_request_irq for %s failed: %d\n", + dev_err(dev, "devm_request_irq for %s failed: %d\n", pf->int_name, err); ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); pf->num_avail_sw_msix += 1; @@ -1780,30 +2272,41 @@ static void ice_napi_add(struct ice_vsi *vsi) } /** - * ice_cfg_netdev - Allocate, configure and register a netdev - * @vsi: the VSI associated with the new netdev - * - * Returns 0 on success, negative value on failure + * ice_set_ops - set netdev and ethtools ops for the given netdev + * @netdev: netdev instance */ -static int ice_cfg_netdev(struct ice_vsi *vsi) +static void ice_set_ops(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + if (ice_is_safe_mode(pf)) { + netdev->netdev_ops = &ice_netdev_safe_mode_ops; + ice_set_ethtool_safe_mode_ops(netdev); + return; + } + + netdev->netdev_ops = &ice_netdev_ops; + ice_set_ethtool_ops(netdev); +} + +/** + * ice_set_netdev_features - set features for the given netdev + * @netdev: netdev instance + */ +static void ice_set_netdev_features(struct net_device *netdev) { + struct ice_pf *pf = ice_netdev_to_pf(netdev); netdev_features_t csumo_features; netdev_features_t vlano_features; netdev_features_t dflt_features; netdev_features_t tso_features; - struct ice_netdev_priv *np; - struct net_device *netdev; - u8 mac_addr[ETH_ALEN]; - int err; - netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, - vsi->alloc_rxq); - if (!netdev) - return -ENOMEM; - - vsi->netdev = netdev; - np = netdev_priv(netdev); - np->vsi = vsi; + if (ice_is_safe_mode(pf)) { + /* safe mode */ + netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA; + netdev->hw_features = netdev->features; + return; + } dflt_features = NETIF_F_SG | NETIF_F_HIGHDMA | @@ -1818,7 +2321,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - tso_features = NETIF_F_TSO; + tso_features = NETIF_F_TSO | + NETIF_F_GSO_UDP_L4; /* set features that user can change */ netdev->hw_features = dflt_features | csumo_features | @@ -1831,25 +2335,50 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) tso_features; netdev->vlan_features |= dflt_features | csumo_features | tso_features; +} + +/** + * ice_cfg_netdev - Allocate, configure and register a netdev + * @vsi: the VSI associated with the new netdev + * + * Returns 0 on success, negative value on failure + */ +static int ice_cfg_netdev(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_netdev_priv *np; + struct net_device *netdev; + u8 mac_addr[ETH_ALEN]; + int err; + + netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, + vsi->alloc_rxq); + if (!netdev) + return -ENOMEM; + + vsi->netdev = netdev; + np = netdev_priv(netdev); + np->vsi = vsi; + + ice_set_netdev_features(netdev); + + ice_set_ops(netdev); if (vsi->type == ICE_VSI_PF) { - SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev); + SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf)); ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); - ether_addr_copy(netdev->dev_addr, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); } netdev->priv_flags |= IFF_UNICAST_FLT; - /* assign netdev_ops */ - netdev->netdev_ops = &ice_netdev_ops; + /* Setup netdev TC information */ + ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); /* setup watchdog timeout value to be 5 second */ netdev->watchdog_timeo = 5 * HZ; - ice_set_ethtool_ops(netdev); - netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = ICE_MAX_MTU; @@ -2012,6 +2541,11 @@ static int ice_setup_pf_sw(struct ice_pf *pf) status = -ENODEV; goto unroll_vsi_setup; } + /* netdev has to be configured before setting frame size */ + ice_vsi_cfg_frame_size(vsi); + + /* Setup DCB netlink interface */ + ice_dcbnl_setup(vsi); /* registering the NAPI handler requires both the queues and * netdev to be created, which are done in ice_pf_vsi_setup() @@ -2041,36 +2575,48 @@ unroll_vsi_setup: ice_vsi_free_q_vectors(vsi); ice_vsi_delete(vsi); ice_vsi_put_qs(vsi); - pf->q_left_tx += vsi->alloc_txq; - pf->q_left_rx += vsi->alloc_rxq; ice_vsi_clear(vsi); } return status; } /** - * ice_determine_q_usage - Calculate queue distribution - * @pf: board private structure - * - * Return -ENOMEM if we don't get enough queues for all ports + * ice_get_avail_q_count - Get count of queues in use + * @pf_qmap: bitmap to get queue use count from + * @lock: pointer to a mutex that protects access to pf_qmap + * @size: size of the bitmap */ -static void ice_determine_q_usage(struct ice_pf *pf) +static u16 +ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size) { - u16 q_left_tx, q_left_rx; + u16 count = 0, bit; - q_left_tx = pf->hw.func_caps.common_cap.num_txq; - q_left_rx = pf->hw.func_caps.common_cap.num_rxq; + mutex_lock(lock); + for_each_clear_bit(bit, pf_qmap, size) + count++; + mutex_unlock(lock); - pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus()); + return count; +} - /* only 1 Rx queue unless RSS is enabled */ - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) - pf->num_lan_rx = 1; - else - pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus()); +/** + * ice_get_avail_txq_count - Get count of Tx queues in use + * @pf: pointer to an ice_pf instance + */ +u16 ice_get_avail_txq_count(struct ice_pf *pf) +{ + return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex, + pf->max_pf_txqs); +} - pf->q_left_tx = q_left_tx - pf->num_lan_tx; - pf->q_left_rx = q_left_rx - pf->num_lan_rx; +/** + * ice_get_avail_rxq_count - Get count of Rx queues in use + * @pf: pointer to an ice_pf instance + */ +u16 ice_get_avail_rxq_count(struct ice_pf *pf) +{ + return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex, + pf->max_pf_rxqs); } /** @@ -2081,44 +2627,75 @@ static void ice_deinit_pf(struct ice_pf *pf) { ice_service_task_stop(pf); mutex_destroy(&pf->sw_mutex); + mutex_destroy(&pf->tc_mutex); mutex_destroy(&pf->avail_q_mutex); + + if (pf->avail_txqs) { + bitmap_free(pf->avail_txqs); + pf->avail_txqs = NULL; + } + + if (pf->avail_rxqs) { + bitmap_free(pf->avail_rxqs); + pf->avail_rxqs = NULL; + } } /** - * ice_init_pf - Initialize general software structures (struct ice_pf) - * @pf: board private structure to initialize + * ice_set_pf_caps - set PFs capability flags + * @pf: pointer to the PF instance */ -static void ice_init_pf(struct ice_pf *pf) +static void ice_set_pf_caps(struct ice_pf *pf) { - bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS); - set_bit(ICE_FLAG_MSIX_ENA, pf->flags); -#ifdef CONFIG_PCI_IOV - if (pf->hw.func_caps.common_cap.sr_iov_1_1) { - struct ice_hw *hw = &pf->hw; + struct ice_hw_func_caps *func_caps = &pf->hw.func_caps; + clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); + if (func_caps->common_cap.dcb) + set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); + if (func_caps->common_cap.sr_iov_1_1) { set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); - pf->num_vfs_supported = min_t(int, hw->func_caps.num_allocd_vfs, + pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs, ICE_MAX_VF_COUNT); } -#endif /* CONFIG_PCI_IOV */ + clear_bit(ICE_FLAG_RSS_ENA, pf->flags); + if (func_caps->common_cap.rss_table_size) + set_bit(ICE_FLAG_RSS_ENA, pf->flags); - mutex_init(&pf->sw_mutex); - mutex_init(&pf->avail_q_mutex); + pf->max_pf_txqs = func_caps->common_cap.num_txq; + pf->max_pf_rxqs = func_caps->common_cap.num_rxq; +} - /* Clear avail_[t|r]x_qs bitmaps (set all to avail) */ - mutex_lock(&pf->avail_q_mutex); - bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS); - bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS); - mutex_unlock(&pf->avail_q_mutex); +/** + * ice_init_pf - Initialize general software structures (struct ice_pf) + * @pf: board private structure to initialize + */ +static int ice_init_pf(struct ice_pf *pf) +{ + ice_set_pf_caps(pf); - if (pf->hw.func_caps.common_cap.rss_table_size) - set_bit(ICE_FLAG_RSS_ENA, pf->flags); + mutex_init(&pf->sw_mutex); + mutex_init(&pf->tc_mutex); /* setup service timer and periodic service task */ timer_setup(&pf->serv_tmr, ice_service_timer, 0); pf->serv_tmr_period = HZ; INIT_WORK(&pf->serv_task, ice_service_task); clear_bit(__ICE_SERVICE_SCHED, pf->state); + + mutex_init(&pf->avail_q_mutex); + pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + if (!pf->avail_txqs) + return -ENOMEM; + + pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); + if (!pf->avail_rxqs) { + devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs); + pf->avail_txqs = NULL; + return -ENOMEM; + } + + return 0; } /** @@ -2130,6 +2707,7 @@ static void ice_init_pf(struct ice_pf *pf) */ static int ice_ena_msix_range(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); int v_left, v_actual, v_budget = 0; int needed, err, i; @@ -2137,15 +2715,20 @@ static int ice_ena_msix_range(struct ice_pf *pf) /* reserve one vector for miscellaneous handler */ needed = 1; + if (v_left < needed) + goto no_hw_vecs_left_err; v_budget += needed; v_left -= needed; /* reserve vectors for LAN traffic */ - pf->num_lan_msix = min_t(int, num_online_cpus(), v_left); - v_budget += pf->num_lan_msix; - v_left -= pf->num_lan_msix; + needed = min_t(int, num_online_cpus(), v_left); + if (v_left < needed) + goto no_hw_vecs_left_err; + pf->num_lan_msix = needed; + v_budget += needed; + v_left -= needed; - pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget, + pf->msix_entries = devm_kcalloc(dev, v_budget, sizeof(*pf->msix_entries), GFP_KERNEL); if (!pf->msix_entries) { @@ -2161,37 +2744,39 @@ static int ice_ena_msix_range(struct ice_pf *pf) ICE_MIN_MSIX, v_budget); if (v_actual < 0) { - dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n"); + dev_err(dev, "unable to reserve MSI-X vectors\n"); err = v_actual; goto msix_err; } if (v_actual < v_budget) { - dev_warn(&pf->pdev->dev, - "not enough vectors. requested = %d, obtained = %d\n", + dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", v_budget, v_actual); - if (v_actual >= (pf->num_lan_msix + 1)) { - pf->num_avail_sw_msix = v_actual - - (pf->num_lan_msix + 1); - } else if (v_actual >= 2) { - pf->num_lan_msix = 1; - pf->num_avail_sw_msix = v_actual - 2; - } else { +/* 2 vectors for LAN (traffic + OICR) */ +#define ICE_MIN_LAN_VECS 2 + + if (v_actual < ICE_MIN_LAN_VECS) { + /* error if we can't get minimum vectors */ pci_disable_msix(pf->pdev); err = -ERANGE; goto msix_err; + } else { + pf->num_lan_msix = ICE_MIN_LAN_VECS; } } return v_actual; msix_err: - devm_kfree(&pf->pdev->dev, pf->msix_entries); + devm_kfree(dev, pf->msix_entries); goto exit_err; +no_hw_vecs_left_err: + dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n", + needed, v_left); + err = -ERANGE; exit_err: pf->num_lan_msix = 0; - clear_bit(ICE_FLAG_MSIX_ENA, pf->flags); return err; } @@ -2202,9 +2787,8 @@ exit_err: static void ice_dis_msix(struct ice_pf *pf) { pci_disable_msix(pf->pdev); - devm_kfree(&pf->pdev->dev, pf->msix_entries); + devm_kfree(ice_pf_to_dev(pf), pf->msix_entries); pf->msix_entries = NULL; - clear_bit(ICE_FLAG_MSIX_ENA, pf->flags); } /** @@ -2213,11 +2797,10 @@ static void ice_dis_msix(struct ice_pf *pf) */ static void ice_clear_interrupt_scheme(struct ice_pf *pf) { - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - ice_dis_msix(pf); + ice_dis_msix(pf); if (pf->irq_tracker) { - devm_kfree(&pf->pdev->dev, pf->irq_tracker); + devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); pf->irq_tracker = NULL; } } @@ -2230,17 +2813,14 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) { int vectors; - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - vectors = ice_ena_msix_range(pf); - else - return -ENODEV; + vectors = ice_ena_msix_range(pf); if (vectors < 0) return vectors; /* set up vector assignment tracking */ pf->irq_tracker = - devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) + + devm_kzalloc(ice_pf_to_dev(pf), sizeof(*pf->irq_tracker) + (sizeof(u16) * vectors), GFP_KERNEL); if (!pf->irq_tracker) { ice_dis_msix(pf); @@ -2256,6 +2836,196 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) } /** + * ice_vsi_recfg_qs - Change the number of queues on a VSI + * @vsi: VSI being changed + * @new_rx: new number of Rx queues + * @new_tx: new number of Tx queues + * + * Only change the number of queues if new_tx, or new_rx is non-0. + * + * Returns 0 on success. + */ +int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) +{ + struct ice_pf *pf = vsi->back; + int err = 0, timeout = 50; + + if (!new_rx && !new_tx) + return -EINVAL; + + while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + if (new_tx) + vsi->req_txq = new_tx; + if (new_rx) + vsi->req_rxq = new_rx; + + /* set for the next time the netdev is started */ + if (!netif_running(vsi->netdev)) { + ice_vsi_rebuild(vsi, false); + dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n"); + goto done; + } + + ice_vsi_close(vsi); + ice_vsi_rebuild(vsi, false); + ice_pf_dcb_recfg(pf); + ice_vsi_open(vsi); +done: + clear_bit(__ICE_CFG_BUSY, pf->state); + return err; +} + +/** + * ice_log_pkg_init - log result of DDP package load + * @hw: pointer to hardware info + * @status: status of package load + */ +static void +ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status) +{ + struct ice_pf *pf = (struct ice_pf *)hw->back; + struct device *dev = ice_pf_to_dev(pf); + + switch (*status) { + case ICE_SUCCESS: + /* The package download AdminQ command returned success because + * this download succeeded or ICE_ERR_AQ_NO_WORK since there is + * already a package loaded on the device. + */ + if (hw->pkg_ver.major == hw->active_pkg_ver.major && + hw->pkg_ver.minor == hw->active_pkg_ver.minor && + hw->pkg_ver.update == hw->active_pkg_ver.update && + hw->pkg_ver.draft == hw->active_pkg_ver.draft && + !memcmp(hw->pkg_name, hw->active_pkg_name, + sizeof(hw->pkg_name))) { + if (hw->pkg_dwnld_status == ICE_AQ_RC_EEXIST) + dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n", + hw->active_pkg_name, + hw->active_pkg_ver.major, + hw->active_pkg_ver.minor, + hw->active_pkg_ver.update, + hw->active_pkg_ver.draft); + else + dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n", + hw->active_pkg_name, + hw->active_pkg_ver.major, + hw->active_pkg_ver.minor, + hw->active_pkg_ver.update, + hw->active_pkg_ver.draft); + } else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ || + hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) { + dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", + hw->active_pkg_name, + hw->active_pkg_ver.major, + hw->active_pkg_ver.minor, + ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); + *status = ICE_ERR_NOT_SUPPORTED; + } else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ && + hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) { + dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n", + hw->active_pkg_name, + hw->active_pkg_ver.major, + hw->active_pkg_ver.minor, + hw->active_pkg_ver.update, + hw->active_pkg_ver.draft, + hw->pkg_name, + hw->pkg_ver.major, + hw->pkg_ver.minor, + hw->pkg_ver.update, + hw->pkg_ver.draft); + } else { + dev_err(dev, "An unknown error occurred when loading the DDP package, please reboot the system. If the problem persists, update the NVM. Entering Safe Mode.\n"); + *status = ICE_ERR_NOT_SUPPORTED; + } + break; + case ICE_ERR_BUF_TOO_SHORT: + /* fall-through */ + case ICE_ERR_CFG: + dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n"); + break; + case ICE_ERR_NOT_SUPPORTED: + /* Package File version not supported */ + if (hw->pkg_ver.major > ICE_PKG_SUPP_VER_MAJ || + (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ && + hw->pkg_ver.minor > ICE_PKG_SUPP_VER_MNR)) + dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n"); + else if (hw->pkg_ver.major < ICE_PKG_SUPP_VER_MAJ || + (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ && + hw->pkg_ver.minor < ICE_PKG_SUPP_VER_MNR)) + dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n", + ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); + break; + case ICE_ERR_AQ_ERROR: + switch (hw->pkg_dwnld_status) { + case ICE_AQ_RC_ENOSEC: + case ICE_AQ_RC_EBADSIG: + dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n"); + return; + case ICE_AQ_RC_ESVN: + dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n"); + return; + case ICE_AQ_RC_EBADMAN: + case ICE_AQ_RC_EBADBUF: + dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n"); + return; + default: + break; + } + /* fall-through */ + default: + dev_err(dev, "An unknown error (%d) occurred when loading the DDP package. Entering Safe Mode.\n", + *status); + break; + } +} + +/** + * ice_load_pkg - load/reload the DDP Package file + * @firmware: firmware structure when firmware requested or NULL for reload + * @pf: pointer to the PF instance + * + * Called on probe and post CORER/GLOBR rebuild to load DDP Package and + * initialize HW tables. + */ +static void +ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf) +{ + enum ice_status status = ICE_ERR_PARAM; + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + + /* Load DDP Package */ + if (firmware && !hw->pkg_copy) { + status = ice_copy_and_init_pkg(hw, firmware->data, + firmware->size); + ice_log_pkg_init(hw, &status); + } else if (!firmware && hw->pkg_copy) { + /* Reload package during rebuild after CORER/GLOBR reset */ + status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size); + ice_log_pkg_init(hw, &status); + } else { + dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n"); + } + + if (status) { + /* Safe Mode */ + clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags); + return; + } + + /* Successful download package is the precondition for advanced + * features, hence setting the ICE_FLAG_ADV_FEATURES flag + */ + set_bit(ICE_FLAG_ADV_FEATURES, pf->flags); +} + +/** * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines * @pf: pointer to the PF structure * @@ -2266,12 +3036,109 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) static void ice_verify_cacheline_size(struct ice_pf *pf) { if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M) - dev_warn(&pf->pdev->dev, - "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", + dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", ICE_CACHE_LINE_BYTES); } /** + * ice_send_version - update firmware with driver version + * @pf: PF struct + * + * Returns ICE_SUCCESS on success, else error code + */ +static enum ice_status ice_send_version(struct ice_pf *pf) +{ + struct ice_driver_ver dv; + + dv.major_ver = DRV_VERSION_MAJOR; + dv.minor_ver = DRV_VERSION_MINOR; + dv.build_ver = DRV_VERSION_BUILD; + dv.subbuild_ver = 0; + strscpy((char *)dv.driver_string, DRV_VERSION, + sizeof(dv.driver_string)); + return ice_aq_send_driver_ver(&pf->hw, &dv, NULL); +} + +/** + * ice_get_opt_fw_name - return optional firmware file name or NULL + * @pf: pointer to the PF instance + */ +static char *ice_get_opt_fw_name(struct ice_pf *pf) +{ + /* Optional firmware name same as default with additional dash + * followed by a EUI-64 identifier (PCIe Device Serial Number) + */ + struct pci_dev *pdev = pf->pdev; + char *opt_fw_filename = NULL; + u32 dword; + u8 dsn[8]; + int pos; + + /* Determine the name of the optional file using the DSN (two + * dwords following the start of the DSN Capability). + */ + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN); + if (pos) { + opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL); + if (!opt_fw_filename) + return NULL; + + pci_read_config_dword(pdev, pos + 4, &dword); + put_unaligned_le32(dword, &dsn[0]); + pci_read_config_dword(pdev, pos + 8, &dword); + put_unaligned_le32(dword, &dsn[4]); + snprintf(opt_fw_filename, NAME_MAX, + "%sice-%02x%02x%02x%02x%02x%02x%02x%02x.pkg", + ICE_DDP_PKG_PATH, + dsn[7], dsn[6], dsn[5], dsn[4], + dsn[3], dsn[2], dsn[1], dsn[0]); + } + + return opt_fw_filename; +} + +/** + * ice_request_fw - Device initialization routine + * @pf: pointer to the PF instance + */ +static void ice_request_fw(struct ice_pf *pf) +{ + char *opt_fw_filename = ice_get_opt_fw_name(pf); + const struct firmware *firmware = NULL; + struct device *dev = ice_pf_to_dev(pf); + int err = 0; + + /* optional device-specific DDP (if present) overrides the default DDP + * package file. kernel logs a debug message if the file doesn't exist, + * and warning messages for other errors. + */ + if (opt_fw_filename) { + err = firmware_request_nowarn(&firmware, opt_fw_filename, dev); + if (err) { + kfree(opt_fw_filename); + goto dflt_pkg_load; + } + + /* request for firmware was successful. Download to device */ + ice_load_pkg(firmware, pf); + kfree(opt_fw_filename); + release_firmware(firmware); + return; + } + +dflt_pkg_load: + err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev); + if (err) { + dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n"); + return; + } + + /* request for firmware was successful. Download to device */ + ice_load_pkg(firmware, pf); + release_firmware(firmware); +} + +/** * ice_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in ice_pci_tbl @@ -2286,7 +3153,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) struct ice_hw *hw; int err; - /* this driver uses devres, see Documentation/driver-api/driver-model/devres.rst */ + /* this driver uses devres, see + * Documentation/driver-api/driver-model/devres.rst + */ err = pcim_enable_device(pdev); if (err) return err; @@ -2321,6 +3190,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) hw = &pf->hw; hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; + pci_save_state(pdev); + hw->back = pf; hw->vendor_id = pdev->vendor; hw->device_id = pdev->device; @@ -2345,23 +3216,28 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_exit_unroll; } - dev_info(dev, "firmware %d.%d.%05d api %d.%d\n", - hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build, - hw->api_maj_ver, hw->api_min_ver); + ice_request_fw(pf); - ice_init_pf(pf); + /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be + * set in pf->state, which will cause ice_is_safe_mode to return + * true + */ + if (ice_is_safe_mode(pf)) { + dev_err(dev, "Package download failed. Advanced features disabled - Device now in Safe Mode\n"); + /* we already got function/device capabilities but these don't + * reflect what the driver needs to do in safe mode. Instead of + * adding conditional logic everywhere to ignore these + * device/function capabilities, override them. + */ + ice_set_safe_mode_caps(hw); + } - err = ice_init_pf_dcb(pf, false); + err = ice_init_pf(pf); if (err) { - clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); - clear_bit(ICE_FLAG_DCB_ENA, pf->flags); - - /* do not fail overall init if DCB init fails */ - err = 0; + dev_err(dev, "ice_init_pf failed: %d\n", err); + goto err_init_pf_unroll; } - ice_determine_q_usage(pf); - pf->num_alloc_vsi = hw->func_caps.guar_num_vsi; if (!pf->num_alloc_vsi) { err = -EIO; @@ -2390,12 +3266,10 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) * the misc functionality and queue processing is combined in * the same vector and that gets setup at open. */ - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - err = ice_req_irq_msix_misc(pf); - if (err) { - dev_err(dev, "setup of misc vector failed: %d\n", err); - goto err_init_interrupt_unroll; - } + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(dev, "setup of misc vector failed: %d\n", err); + goto err_init_interrupt_unroll; } /* create switch struct for the switch element created by FW on boot */ @@ -2417,12 +3291,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) err = ice_setup_pf_sw(pf); if (err) { - dev_err(dev, "probe failed due to setup PF switch:%d\n", err); + dev_err(dev, "probe failed due to setup PF switch: %d\n", err); goto err_alloc_sw_unroll; } clear_bit(__ICE_SERVICE_DIS, pf->state); + /* tell the firmware we are up */ + err = ice_send_version(pf); + if (err) { + dev_err(dev, "probe failed sending driver version %s. error: %d\n", + ice_drv_ver, err); + goto err_alloc_sw_unroll; + } + /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); @@ -2434,12 +3316,29 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_verify_cacheline_size(pf); + /* If no DDP driven features have to be setup, return here */ + if (ice_is_safe_mode(pf)) + return 0; + + /* initialize DDP driven features */ + + /* Note: DCB init failure is non-fatal to load */ + if (ice_init_pf_dcb(pf, false)) { + clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(ICE_FLAG_DCB_ENA, pf->flags); + } else { + ice_cfg_lldp_mib_change(&pf->hw, true); + } + + /* print PCI link speed and width */ + pcie_print_link_status(pf->pdev); + return 0; err_alloc_sw_unroll: set_bit(__ICE_SERVICE_DIS, pf->state); set_bit(__ICE_DOWN, pf->state); - devm_kfree(&pf->pdev->dev, pf->first_sw); + devm_kfree(dev, pf->first_sw); err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: @@ -2483,9 +3382,15 @@ static void ice_remove(struct pci_dev *pdev) continue; ice_vsi_free_q_vectors(pf->vsi[i]); } - ice_clear_interrupt_scheme(pf); ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); + /* Issue a PFR as part of the prescribed driver unload flow. Do not + * do it via ice_schedule_reset() since there is no need to rebuild + * and the service task is already stopped. + */ + ice_reset(&pf->hw, ICE_RESET_PFR); + pci_wait_for_pending_transaction(pdev); + ice_clear_interrupt_scheme(pf); pci_disable_pcie_error_reporting(pdev); } @@ -2536,8 +3441,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) err = pci_enable_device_mem(pdev); if (err) { - dev_err(&pdev->dev, - "Cannot re-enable PCI device after reset, error %d\n", + dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n", err); result = PCI_ERS_RESULT_DISCONNECT; } else { @@ -2556,8 +3460,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) err = pci_cleanup_aer_uncorrect_error_status(pdev); if (err) - dev_dbg(&pdev->dev, - "pci_cleanup_aer_uncorrect_error_status failed, error %d\n", + dev_dbg(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status failed, error %d\n", err); /* non-fatal, continue */ @@ -2576,8 +3479,8 @@ static void ice_pci_err_resume(struct pci_dev *pdev) struct ice_pf *pf = pci_get_drvdata(pdev); if (!pf) { - dev_err(&pdev->dev, - "%s failed, device is unrecoverable\n", __func__); + dev_err(&pdev->dev, "%s failed, device is unrecoverable\n", + __func__); return; } @@ -2631,6 +3534,15 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822X_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 }, /* required last entry */ { 0, } }; @@ -2711,10 +3623,8 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) struct ice_hw *hw = &pf->hw; struct sockaddr *addr = pi; enum ice_status status; - LIST_HEAD(a_mac_list); - LIST_HEAD(r_mac_list); u8 flags = 0; - int err; + int err = 0; u8 *mac; mac = (u8 *)addr->sa_data; @@ -2737,42 +3647,23 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) /* When we change the MAC address we also have to change the MAC address * based filter rules that were created previously for the old MAC * address. So first, we remove the old filter rule using ice_remove_mac - * and then create a new filter rule using ice_add_mac. Note that for - * both these operations, we first need to form a "list" of MAC - * addresses (even though in this case, we have only 1 MAC address to be - * added/removed) and this done using ice_add_mac_to_list. Depending on - * the ensuing operation this "list" of MAC addresses is either to be - * added or removed from the filter. + * and then create a new filter rule using ice_add_mac via + * ice_vsi_cfg_mac_fltr function call for both add and/or remove + * filters. */ - err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr); - if (err) { - err = -EADDRNOTAVAIL; - goto free_lists; - } - - status = ice_remove_mac(hw, &r_mac_list); + status = ice_vsi_cfg_mac_fltr(vsi, netdev->dev_addr, false); if (status) { err = -EADDRNOTAVAIL; - goto free_lists; - } - - err = ice_add_mac_to_list(vsi, &a_mac_list, mac); - if (err) { - err = -EADDRNOTAVAIL; - goto free_lists; + goto err_update_filters; } - status = ice_add_mac(hw, &a_mac_list); + status = ice_vsi_cfg_mac_fltr(vsi, mac, true); if (status) { err = -EADDRNOTAVAIL; - goto free_lists; + goto err_update_filters; } -free_lists: - /* free list entries */ - ice_free_fltr_list(&pf->pdev->dev, &r_mac_list); - ice_free_fltr_list(&pf->pdev->dev, &a_mac_list); - +err_update_filters: if (err) { netdev_err(netdev, "can't set MAC %pM. filter update failed\n", mac); @@ -2788,8 +3679,8 @@ free_lists: flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL; status = ice_aq_manage_mac_write(hw, mac, flags, NULL); if (status) { - netdev_err(netdev, "can't set MAC %pM. write to firmware failed.\n", - mac); + netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n", + mac, status); } return 0; } @@ -2821,6 +3712,47 @@ static void ice_set_rx_mode(struct net_device *netdev) } /** + * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate + * @netdev: network interface device structure + * @queue_index: Queue ID + * @maxrate: maximum bandwidth in Mbps + */ +static int +ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + enum ice_status status; + u16 q_handle; + u8 tc; + + /* Validate maxrate requested is within permitted range */ + if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) { + netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n", + maxrate, queue_index); + return -EINVAL; + } + + q_handle = vsi->tx_rings[queue_index]->q_handle; + tc = ice_dcb_get_tc(vsi, queue_index); + + /* Set BW back to default, when user set maxrate to 0 */ + if (!maxrate) + status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc, + q_handle, ICE_MAX_BW); + else + status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc, + q_handle, ICE_MAX_BW, maxrate * 1000); + if (status) { + netdev_err(netdev, "Unable to set Tx max rate, error %d\n", + status); + return -EIO; + } + + return 0; +} + +/** * ice_fdb_add - add an entry to the hardware database * @ndm: the input from the stack * @tb: pointer to array of nladdr (unused) @@ -2900,8 +3832,21 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; int ret = 0; + /* Don't set any netdev advanced features with device in Safe Mode */ + if (ice_is_safe_mode(vsi->back)) { + dev_err(ice_pf_to_dev(vsi->back), "Device is in Safe Mode - not enabling advanced netdev features\n"); + return ret; + } + + /* Do not change setting during reset */ + if (ice_is_reset_in_progress(pf->state)) { + dev_err(ice_pf_to_dev(vsi->back), "Device is resetting, changing advanced netdev features temporarily unavailable.\n"); + return -EBUSY; + } + /* Multiple features can be changed in one call so keep features in * separate if/else statements to guarantee each feature is checked */ @@ -2972,6 +3917,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi) ice_vsi_cfg_dcb_rings(vsi); err = ice_vsi_cfg_lan_txqs(vsi); + if (!err && ice_is_xdp_ena_vsi(vsi)) + err = ice_vsi_cfg_xdp_txqs(vsi); if (!err) err = ice_vsi_cfg_rxqs(vsi); @@ -3008,10 +3955,7 @@ static int ice_up_complete(struct ice_vsi *vsi) struct ice_pf *pf = vsi->back; int err; - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - ice_vsi_cfg_msix(vsi); - else - return -ENOTSUPP; + ice_vsi_cfg_msix(vsi); /* Enable only Rx rings, Tx rings were enabled by the FW when the * Tx queue group list was configured and the context bits were @@ -3132,7 +4076,7 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) * ice_update_vsi_stats - Update VSI stats counters * @vsi: the VSI to be updated */ -static void ice_update_vsi_stats(struct ice_vsi *vsi) +void ice_update_vsi_stats(struct ice_vsi *vsi) { struct rtnl_link_stats64 *cur_ns = &vsi->net_stats; struct ice_eth_stats *cur_es = &vsi->eth_stats; @@ -3159,6 +4103,8 @@ static void ice_update_vsi_stats(struct ice_vsi *vsi) cur_ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes; cur_ns->rx_length_errors = pf->stats.rx_len_errors; + /* record drops from the port level */ + cur_ns->rx_missed_errors = pf->stats.eth.rx_discards; } } @@ -3166,149 +4112,139 @@ static void ice_update_vsi_stats(struct ice_vsi *vsi) * ice_update_pf_stats - Update PF port stats counters * @pf: PF whose stats needs to be updated */ -static void ice_update_pf_stats(struct ice_pf *pf) +void ice_update_pf_stats(struct ice_pf *pf) { struct ice_hw_port_stats *prev_ps, *cur_ps; struct ice_hw *hw = &pf->hw; - u8 pf_id; + u8 port; + port = hw->port_info->lport; prev_ps = &pf->stats_prev; cur_ps = &pf->stats; - pf_id = hw->pf_id; - ice_stat_update40(hw, GLPRT_GORCH(pf_id), GLPRT_GORCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.rx_bytes, + ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded, + &prev_ps->eth.rx_bytes, &cur_ps->eth.rx_bytes); - ice_stat_update40(hw, GLPRT_UPRCH(pf_id), GLPRT_UPRCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.rx_unicast, + ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded, + &prev_ps->eth.rx_unicast, &cur_ps->eth.rx_unicast); - ice_stat_update40(hw, GLPRT_MPRCH(pf_id), GLPRT_MPRCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.rx_multicast, + ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded, + &prev_ps->eth.rx_multicast, &cur_ps->eth.rx_multicast); - ice_stat_update40(hw, GLPRT_BPRCH(pf_id), GLPRT_BPRCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.rx_broadcast, + ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded, + &prev_ps->eth.rx_broadcast, &cur_ps->eth.rx_broadcast); - ice_stat_update40(hw, GLPRT_GOTCH(pf_id), GLPRT_GOTCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.tx_bytes, + ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded, + &prev_ps->eth.rx_discards, + &cur_ps->eth.rx_discards); + + ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded, + &prev_ps->eth.tx_bytes, &cur_ps->eth.tx_bytes); - ice_stat_update40(hw, GLPRT_UPTCH(pf_id), GLPRT_UPTCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.tx_unicast, + ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded, + &prev_ps->eth.tx_unicast, &cur_ps->eth.tx_unicast); - ice_stat_update40(hw, GLPRT_MPTCH(pf_id), GLPRT_MPTCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.tx_multicast, + ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded, + &prev_ps->eth.tx_multicast, &cur_ps->eth.tx_multicast); - ice_stat_update40(hw, GLPRT_BPTCH(pf_id), GLPRT_BPTCL(pf_id), - pf->stat_prev_loaded, &prev_ps->eth.tx_broadcast, + ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded, + &prev_ps->eth.tx_broadcast, &cur_ps->eth.tx_broadcast); - ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded, &prev_ps->tx_dropped_link_down, &cur_ps->tx_dropped_link_down); - ice_stat_update40(hw, GLPRT_PRC64H(pf_id), GLPRT_PRC64L(pf_id), - pf->stat_prev_loaded, &prev_ps->rx_size_64, - &cur_ps->rx_size_64); + ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded, + &prev_ps->rx_size_64, &cur_ps->rx_size_64); - ice_stat_update40(hw, GLPRT_PRC127H(pf_id), GLPRT_PRC127L(pf_id), - pf->stat_prev_loaded, &prev_ps->rx_size_127, - &cur_ps->rx_size_127); + ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded, + &prev_ps->rx_size_127, &cur_ps->rx_size_127); - ice_stat_update40(hw, GLPRT_PRC255H(pf_id), GLPRT_PRC255L(pf_id), - pf->stat_prev_loaded, &prev_ps->rx_size_255, - &cur_ps->rx_size_255); + ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded, + &prev_ps->rx_size_255, &cur_ps->rx_size_255); - ice_stat_update40(hw, GLPRT_PRC511H(pf_id), GLPRT_PRC511L(pf_id), - pf->stat_prev_loaded, &prev_ps->rx_size_511, - &cur_ps->rx_size_511); + ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded, + &prev_ps->rx_size_511, &cur_ps->rx_size_511); - ice_stat_update40(hw, GLPRT_PRC1023H(pf_id), - GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded, + ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded, &prev_ps->rx_size_1023, &cur_ps->rx_size_1023); - ice_stat_update40(hw, GLPRT_PRC1522H(pf_id), - GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded, + ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded, &prev_ps->rx_size_1522, &cur_ps->rx_size_1522); - ice_stat_update40(hw, GLPRT_PRC9522H(pf_id), - GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded, + ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded, &prev_ps->rx_size_big, &cur_ps->rx_size_big); - ice_stat_update40(hw, GLPRT_PTC64H(pf_id), GLPRT_PTC64L(pf_id), - pf->stat_prev_loaded, &prev_ps->tx_size_64, - &cur_ps->tx_size_64); + ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded, + &prev_ps->tx_size_64, &cur_ps->tx_size_64); - ice_stat_update40(hw, GLPRT_PTC127H(pf_id), GLPRT_PTC127L(pf_id), - pf->stat_prev_loaded, &prev_ps->tx_size_127, - &cur_ps->tx_size_127); + ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded, + &prev_ps->tx_size_127, &cur_ps->tx_size_127); - ice_stat_update40(hw, GLPRT_PTC255H(pf_id), GLPRT_PTC255L(pf_id), - pf->stat_prev_loaded, &prev_ps->tx_size_255, - &cur_ps->tx_size_255); + ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded, + &prev_ps->tx_size_255, &cur_ps->tx_size_255); - ice_stat_update40(hw, GLPRT_PTC511H(pf_id), GLPRT_PTC511L(pf_id), - pf->stat_prev_loaded, &prev_ps->tx_size_511, - &cur_ps->tx_size_511); + ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded, + &prev_ps->tx_size_511, &cur_ps->tx_size_511); - ice_stat_update40(hw, GLPRT_PTC1023H(pf_id), - GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded, + ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded, &prev_ps->tx_size_1023, &cur_ps->tx_size_1023); - ice_stat_update40(hw, GLPRT_PTC1522H(pf_id), - GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded, + ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded, &prev_ps->tx_size_1522, &cur_ps->tx_size_1522); - ice_stat_update40(hw, GLPRT_PTC9522H(pf_id), - GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded, + ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded, &prev_ps->tx_size_big, &cur_ps->tx_size_big); - ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded, &prev_ps->link_xon_rx, &cur_ps->link_xon_rx); - ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded, &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx); - ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded, &prev_ps->link_xon_tx, &cur_ps->link_xon_tx); - ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded, &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx); ice_update_dcb_stats(pf); - ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded, &prev_ps->crc_errors, &cur_ps->crc_errors); - ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded, &prev_ps->illegal_bytes, &cur_ps->illegal_bytes); - ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded, &prev_ps->mac_local_faults, &cur_ps->mac_local_faults); - ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded, &prev_ps->mac_remote_faults, &cur_ps->mac_remote_faults); - ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded, &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); - ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded, &prev_ps->rx_undersize, &cur_ps->rx_undersize); - ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded, &prev_ps->rx_fragments, &cur_ps->rx_fragments); - ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded, &prev_ps->rx_oversize, &cur_ps->rx_oversize); - ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded, + ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded, &prev_ps->rx_jabber, &cur_ps->rx_jabber); pf->stat_prev_loaded = true; @@ -3328,12 +4264,16 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) vsi_stats = &vsi->net_stats; - if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq) + if (!vsi->num_txq || !vsi->num_rxq) return; + /* netdev packet/byte stats come from ring counter. These are obtained * by summing up ring counters (done by ice_update_vsi_ring_stats). + * But, only call the update routine and read the registers if VSI is + * not down. */ - ice_update_vsi_ring_stats(vsi); + if (!test_bit(__ICE_DOWN, vsi->state)) + ice_update_vsi_ring_stats(vsi); stats->tx_packets = vsi_stats->tx_packets; stats->tx_bytes = vsi_stats->tx_bytes; stats->rx_packets = vsi_stats->rx_packets; @@ -3372,85 +4312,6 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) } /** - * ice_force_phys_link_state - Force the physical link state - * @vsi: VSI to force the physical link state to up/down - * @link_up: true/false indicates to set the physical link to up/down - * - * Force the physical link state by getting the current PHY capabilities from - * hardware and setting the PHY config based on the determined capabilities. If - * link changes a link event will be triggered because both the Enable Automatic - * Link Update and LESM Enable bits are set when setting the PHY capabilities. - * - * Returns 0 on success, negative on failure - */ -static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) -{ - struct ice_aqc_get_phy_caps_data *pcaps; - struct ice_aqc_set_phy_cfg_data *cfg; - struct ice_port_info *pi; - struct device *dev; - int retcode; - - if (!vsi || !vsi->port_info || !vsi->back) - return -EINVAL; - if (vsi->type != ICE_VSI_PF) - return 0; - - dev = &vsi->back->pdev->dev; - - pi = vsi->port_info; - - pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL); - if (!pcaps) - return -ENOMEM; - - retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, - NULL); - if (retcode) { - dev_err(dev, - "Failed to get phy capabilities, VSI %d error %d\n", - vsi->vsi_num, retcode); - retcode = -EIO; - goto out; - } - - /* No change in link */ - if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) && - link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) - goto out; - - cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL); - if (!cfg) { - retcode = -ENOMEM; - goto out; - } - - cfg->phy_type_low = pcaps->phy_type_low; - cfg->phy_type_high = pcaps->phy_type_high; - cfg->caps = pcaps->caps | ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; - cfg->low_power_ctrl = pcaps->low_power_ctrl; - cfg->eee_cap = pcaps->eee_cap; - cfg->eeer_value = pcaps->eeer_value; - cfg->link_fec_opt = pcaps->link_fec_options; - if (link_up) - cfg->caps |= ICE_AQ_PHY_ENA_LINK; - else - cfg->caps &= ~ICE_AQ_PHY_ENA_LINK; - - retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi->lport, cfg, NULL); - if (retcode) { - dev_err(dev, "Failed to set phy config, VSI %d error %d\n", - vsi->vsi_num, retcode); - retcode = -EIO; - } - - devm_kfree(dev, cfg); -out: - devm_kfree(dev, pcaps); - return retcode; -} - -/** * ice_down - Shutdown the connection * @vsi: The VSI being stopped */ @@ -3470,14 +4331,18 @@ int ice_down(struct ice_vsi *vsi) tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0); if (tx_err) - netdev_err(vsi->netdev, - "Failed stop Tx rings, VSI %d error %d\n", + netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n", vsi->vsi_num, tx_err); + if (!tx_err && ice_is_xdp_ena_vsi(vsi)) { + tx_err = ice_vsi_stop_xdp_tx_rings(vsi); + if (tx_err) + netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n", + vsi->vsi_num, tx_err); + } rx_err = ice_vsi_stop_rx_rings(vsi); if (rx_err) - netdev_err(vsi->netdev, - "Failed stop Rx rings, VSI %d error %d\n", + netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n", vsi->vsi_num, rx_err); ice_napi_disable_all(vsi); @@ -3485,8 +4350,7 @@ int ice_down(struct ice_vsi *vsi) if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) { link_err = ice_force_phys_link_state(vsi, false); if (link_err) - netdev_err(vsi->netdev, - "Failed to set physical link down, VSI %d error %d\n", + netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", vsi->vsi_num, link_err); } @@ -3497,8 +4361,7 @@ int ice_down(struct ice_vsi *vsi) ice_clean_rx_ring(vsi->rx_rings[i]); if (tx_err || rx_err || link_err) { - netdev_err(vsi->netdev, - "Failed to close VSI 0x%04X on switch 0x%04X\n", + netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); return -EIO; } @@ -3517,14 +4380,19 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) int i, err = 0; if (!vsi->num_txq) { - dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", + dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n", vsi->vsi_num); return -EINVAL; } ice_for_each_txq(vsi, i) { - vsi->tx_rings[i]->netdev = vsi->netdev; - err = ice_setup_tx_ring(vsi->tx_rings[i]); + struct ice_ring *ring = vsi->tx_rings[i]; + + if (!ring) + return -EINVAL; + + ring->netdev = vsi->netdev; + err = ice_setup_tx_ring(ring); if (err) break; } @@ -3543,14 +4411,19 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) int i, err = 0; if (!vsi->num_rxq) { - dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", + dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n", vsi->vsi_num); return -EINVAL; } ice_for_each_rxq(vsi, i) { - vsi->rx_rings[i]->netdev = vsi->netdev; - err = ice_setup_rx_ring(vsi->rx_rings[i]); + struct ice_ring *ring = vsi->rx_rings[i]; + + if (!ring) + return -EINVAL; + + ring->netdev = vsi->netdev; + err = ice_setup_rx_ring(ring); if (err) break; } @@ -3559,24 +4432,6 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) } /** - * ice_vsi_req_irq - Request IRQ from the OS - * @vsi: The VSI IRQ is being requested for - * @basename: name for the vector - * - * Return 0 on success and a negative value on error - */ -static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename) -{ - struct ice_pf *pf = vsi->back; - int err = -EINVAL; - - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - err = ice_vsi_req_irq_msix(vsi, basename); - - return err; -} - -/** * ice_vsi_open - Called when a network interface is made active * @vsi: the VSI to open * @@ -3604,8 +4459,8 @@ static int ice_vsi_open(struct ice_vsi *vsi) goto err_setup_rx; snprintf(int_name, sizeof(int_name) - 1, "%s-%s", - dev_driver_string(&pf->pdev->dev), vsi->netdev->name); - err = ice_vsi_req_irq(vsi, int_name); + dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name); + err = ice_vsi_req_irq_msix(vsi, int_name); if (err) goto err_setup_rx; @@ -3653,150 +4508,108 @@ static void ice_vsi_release_all(struct ice_pf *pf) err = ice_vsi_release(pf->vsi[i]); if (err) - dev_dbg(&pf->pdev->dev, - "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", + dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", i, err, pf->vsi[i]->vsi_num); } } /** - * ice_ena_vsi - resume a VSI - * @vsi: the VSI being resume - * @locked: is the rtnl_lock already held + * ice_vsi_rebuild_by_type - Rebuild VSI of a given type + * @pf: pointer to the PF instance + * @type: VSI type to rebuild + * + * Iterates through the pf->vsi array and rebuilds VSIs of the requested type */ -static int ice_ena_vsi(struct ice_vsi *vsi, bool locked) +static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) { - int err = 0; - - if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) - return err; + struct device *dev = ice_pf_to_dev(pf); + enum ice_status status; + int i, err; - clear_bit(__ICE_NEEDS_RESTART, vsi->state); + ice_for_each_vsi(pf, i) { + struct ice_vsi *vsi = pf->vsi[i]; - if (vsi->netdev && vsi->type == ICE_VSI_PF) { - struct net_device *netd = vsi->netdev; + if (!vsi || vsi->type != type) + continue; - if (netif_running(vsi->netdev)) { - if (locked) { - err = netd->netdev_ops->ndo_open(netd); - } else { - rtnl_lock(); - err = netd->netdev_ops->ndo_open(netd); - rtnl_unlock(); - } - } else { - err = ice_vsi_open(vsi); + /* rebuild the VSI */ + err = ice_vsi_rebuild(vsi, true); + if (err) { + dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n", + err, vsi->idx, ice_vsi_type_str(type)); + return err; } - } - - return err; -} -/** - * ice_pf_ena_all_vsi - Resume all VSIs on a PF - * @pf: the PF - * @locked: is the rtnl_lock already held - */ -#ifdef CONFIG_DCB -int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked) -#else -static int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked) -#endif /* CONFIG_DCB */ -{ - int v; - - ice_for_each_vsi(pf, v) - if (pf->vsi[v]) - if (ice_ena_vsi(pf->vsi[v], locked)) - return -EIO; - - return 0; -} - -/** - * ice_vsi_rebuild_all - rebuild all VSIs in PF - * @pf: the PF - */ -static int ice_vsi_rebuild_all(struct ice_pf *pf) -{ - int i; - - /* loop through pf->vsi array and reinit the VSI if found */ - ice_for_each_vsi(pf, i) { - int err; + /* replay filters for the VSI */ + status = ice_replay_vsi(&pf->hw, vsi->idx); + if (status) { + dev_err(dev, "replay VSI failed, status %d, VSI index %d, type %s\n", + status, vsi->idx, ice_vsi_type_str(type)); + return -EIO; + } - if (!pf->vsi[i]) - continue; + /* Re-map HW VSI number, using VSI handle that has been + * previously validated in ice_replay_vsi() call above + */ + vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); - err = ice_vsi_rebuild(pf->vsi[i]); + /* enable the VSI */ + err = ice_ena_vsi(vsi, false); if (err) { - dev_err(&pf->pdev->dev, - "VSI at index %d rebuild failed\n", - pf->vsi[i]->idx); + dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n", + err, vsi->idx, ice_vsi_type_str(type)); return err; } - dev_info(&pf->pdev->dev, - "VSI at index %d rebuilt. vsi_num = 0x%x\n", - pf->vsi[i]->idx, pf->vsi[i]->vsi_num); + dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx, + ice_vsi_type_str(type)); } return 0; } /** - * ice_vsi_replay_all - replay all VSIs configuration in the PF - * @pf: the PF + * ice_update_pf_netdev_link - Update PF netdev link status + * @pf: pointer to the PF instance */ -static int ice_vsi_replay_all(struct ice_pf *pf) +static void ice_update_pf_netdev_link(struct ice_pf *pf) { - struct ice_hw *hw = &pf->hw; - enum ice_status ret; + bool link_up; int i; - /* loop through pf->vsi array and replay the VSI if found */ ice_for_each_vsi(pf, i) { - if (!pf->vsi[i]) - continue; - - ret = ice_replay_vsi(hw, pf->vsi[i]->idx); - if (ret) { - dev_err(&pf->pdev->dev, - "VSI at index %d replay failed %d\n", - pf->vsi[i]->idx, ret); - return -EIO; - } + struct ice_vsi *vsi = pf->vsi[i]; - /* Re-map HW VSI number, using VSI handle that has been - * previously validated in ice_replay_vsi() call above - */ - pf->vsi[i]->vsi_num = ice_get_hw_vsi_num(hw, pf->vsi[i]->idx); + if (!vsi || vsi->type != ICE_VSI_PF) + return; - dev_info(&pf->pdev->dev, - "VSI at index %d filter replayed successfully - vsi_num %i\n", - pf->vsi[i]->idx, pf->vsi[i]->vsi_num); + ice_get_link_status(pf->vsi[i]->port_info, &link_up); + if (link_up) { + netif_carrier_on(pf->vsi[i]->netdev); + netif_tx_wake_all_queues(pf->vsi[i]->netdev); + } else { + netif_carrier_off(pf->vsi[i]->netdev); + netif_tx_stop_all_queues(pf->vsi[i]->netdev); + } } - - /* Clean up replay filter after successful re-configuration */ - ice_replay_post(hw); - return 0; } /** * ice_rebuild - rebuild after reset * @pf: PF to rebuild + * @reset_type: type of reset */ -static void ice_rebuild(struct ice_pf *pf) +static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) { - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; enum ice_status ret; - int err, i; + int err; if (test_bit(__ICE_DOWN, pf->state)) goto clear_recovery; - dev_dbg(dev, "rebuilding PF\n"); + dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type); ret = ice_init_all_ctrlq(hw); if (ret) { @@ -3804,12 +4617,28 @@ static void ice_rebuild(struct ice_pf *pf) goto err_init_ctrlq; } + /* if DDP was previously loaded successfully */ + if (!ice_is_safe_mode(pf)) { + /* reload the SW DB of filter tables */ + if (reset_type == ICE_RESET_PFR) + ice_fill_blk_tbls(hw); + else + /* Reload DDP Package after CORER/GLOBR reset */ + ice_load_pkg(NULL, pf); + } + ret = ice_clear_pf_cfg(hw); if (ret) { dev_err(dev, "clear PF configuration failed %d\n", ret); goto err_init_ctrlq; } + if (pf->first_sw->dflt_vsi_ena) + dev_info(dev, "Clearing default VSI, re-enable after reset completes\n"); + /* clear the default VSI configuration if it exists */ + pf->first_sw->dflt_vsi = NULL; + pf->first_sw->dflt_vsi_ena = false; + ice_clear_pxe_mode(hw); ret = ice_get_caps(hw); @@ -3822,65 +4651,52 @@ static void ice_rebuild(struct ice_pf *pf) if (err) goto err_sched_init_port; - ice_dcb_rebuild(pf); + err = ice_update_link_info(hw->port_info); + if (err) + dev_err(dev, "Get link status error %d\n", err); - err = ice_vsi_rebuild_all(pf); + /* start misc vector */ + err = ice_req_irq_msix_misc(pf); if (err) { - dev_err(dev, "ice_vsi_rebuild_all failed\n"); - goto err_vsi_rebuild; + dev_err(dev, "misc vector setup failed: %d\n", err); + goto err_sched_init_port; } - err = ice_update_link_info(hw->port_info); - if (err) - dev_err(&pf->pdev->dev, "Get link status error %d\n", err); + if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) + ice_dcb_rebuild(pf); - /* Replay all VSIs Configuration, including filters after reset */ - if (ice_vsi_replay_all(pf)) { - dev_err(&pf->pdev->dev, - "error replaying VSI configurations with switch filter rules\n"); + /* rebuild PF VSI */ + err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF); + if (err) { + dev_err(dev, "PF VSI rebuild failed: %d\n", err); goto err_vsi_rebuild; } - /* start misc vector */ - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - err = ice_req_irq_msix_misc(pf); + if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { + err = ice_vsi_rebuild_by_type(pf, ICE_VSI_VF); if (err) { - dev_err(dev, "misc vector setup failed: %d\n", err); + dev_err(dev, "VF VSI rebuild failed: %d\n", err); goto err_vsi_rebuild; } } - /* restart the VSIs that were rebuilt and running before the reset */ - err = ice_pf_ena_all_vsi(pf, false); - if (err) { - dev_err(&pf->pdev->dev, "error enabling VSIs\n"); - /* no need to disable VSIs in tear down path in ice_rebuild() - * since its already taken care in ice_vsi_open() - */ + ice_update_pf_netdev_link(pf); + + /* tell the firmware we are up */ + ret = ice_send_version(pf); + if (ret) { + dev_err(dev, "Rebuild failed due to error sending driver version: %d\n", + ret); goto err_vsi_rebuild; } - ice_for_each_vsi(pf, i) { - bool link_up; - - if (!pf->vsi[i] || pf->vsi[i]->type != ICE_VSI_PF) - continue; - ice_get_link_status(pf->vsi[i]->port_info, &link_up); - if (link_up) { - netif_carrier_on(pf->vsi[i]->netdev); - netif_tx_wake_all_queues(pf->vsi[i]->netdev); - } else { - netif_carrier_off(pf->vsi[i]->netdev); - netif_tx_stop_all_queues(pf->vsi[i]->netdev); - } - } + ice_replay_post(hw); /* if we get here, reset flow is successful */ clear_bit(__ICE_RESET_FAILED, pf->state); return; err_vsi_rebuild: - ice_vsi_release_all(pf); err_sched_init_port: ice_sched_cleanup_all(hw); err_init_ctrlq: @@ -3893,6 +4709,18 @@ clear_recovery: } /** + * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * @vsi: Pointer to VSI structure + */ +static int ice_max_xdp_frame_size(struct ice_vsi *vsi) +{ + if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) + return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM; + else + return ICE_RXBUF_3072; +} + +/** * ice_change_mtu - NDO callback to change the MTU * @netdev: network interface device structure * @new_mtu: new value for maximum frame size @@ -3911,6 +4739,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return 0; } + if (ice_is_xdp_ena_vsi(vsi)) { + int frame_size = ice_max_xdp_frame_size(vsi); + + if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) { + netdev_err(netdev, "max MTU for XDP usage is %d\n", + frame_size - ICE_ETH_PKT_HDR_PAD); + return -EINVAL; + } + } + if (new_mtu < netdev->min_mtu) { netdev_err(netdev, "new MTU invalid. min_mtu is %d\n", netdev->min_mtu); @@ -3955,7 +4793,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } } - netdev_info(netdev, "changed MTU to %d\n", new_mtu); + netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); return 0; } @@ -3973,7 +4811,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; + struct device *dev; + dev = ice_pf_to_dev(pf); if (seed) { struct ice_aqc_get_set_rss_keys *buf = (struct ice_aqc_get_set_rss_keys *)seed; @@ -3981,8 +4821,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_set_rss_key(hw, vsi->idx, buf); if (status) { - dev_err(&pf->pdev->dev, - "Cannot set RSS key, err %d aq_err %d\n", + dev_err(dev, "Cannot set RSS key, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -3992,8 +4831,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, lut_size); if (status) { - dev_err(&pf->pdev->dev, - "Cannot set RSS lut, err %d aq_err %d\n", + dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -4016,15 +4854,16 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; + struct device *dev; + dev = ice_pf_to_dev(pf); if (seed) { struct ice_aqc_get_set_rss_keys *buf = (struct ice_aqc_get_set_rss_keys *)seed; status = ice_aq_get_rss_key(hw, vsi->idx, buf); if (status) { - dev_err(&pf->pdev->dev, - "Cannot get RSS key, err %d aq_err %d\n", + dev_err(dev, "Cannot get RSS key, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -4034,8 +4873,7 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, lut_size); if (status) { - dev_err(&pf->pdev->dev, - "Cannot get RSS lut, err %d aq_err %d\n", + dev_err(dev, "Cannot get RSS lut, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -4079,7 +4917,6 @@ ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, */ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) { - struct device *dev = &vsi->back->pdev->dev; struct ice_aqc_vsi_props *vsi_props; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; @@ -4088,7 +4925,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) vsi_props = &vsi->info; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -4104,7 +4941,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", bmode, status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -4113,7 +4950,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) vsi_props->sw_flags = ctxt->info.sw_flags; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -4191,42 +5028,23 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, * ice_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void ice_tx_timeout(struct net_device *netdev) +static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_ring *tx_ring = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - int hung_queue = -1; u32 i; pf->tx_timeout_count++; - /* find the stopped queue the same way dev_watchdog() does */ - for (i = 0; i < netdev->num_tx_queues; i++) { - unsigned long trans_start; - struct netdev_queue *q; - - q = netdev_get_tx_queue(netdev, i); - trans_start = q->trans_start; - if (netif_xmit_stopped(q) && - time_after(jiffies, - trans_start + netdev->watchdog_timeo)) { - hung_queue = i; - break; - } - } - - if (i == netdev->num_tx_queues) - netdev_info(netdev, "tx_timeout: no netdev hung queue found\n"); - else - /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_txq; i++) - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) - if (hung_queue == vsi->tx_rings[i]->q_index) { - tx_ring = vsi->tx_rings[i]; - break; - } + /* now that we have an index, find the tx_ring struct */ + for (i = 0; i < vsi->num_txq; i++) + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) + if (txqueue == vsi->tx_rings[i]->q_index) { + tx_ring = vsi->tx_rings[i]; + break; + } /* Reset recovery level if enough time has elapsed after last timeout. * Also ensure no new reset action happens before next timeout period. @@ -4241,21 +5059,19 @@ static void ice_tx_timeout(struct net_device *netdev) struct ice_hw *hw = &pf->hw; u32 head, val = 0; - head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[hung_queue])) & + head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) & QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S; /* Read interrupt register */ - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - val = rd32(hw, - GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); + val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %d, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n", - vsi->vsi_num, hung_queue, tx_ring->next_to_clean, + vsi->vsi_num, txqueue, tx_ring->next_to_clean, head, tx_ring->next_to_use, val); } pf->tx_timeout_last_recovery = jiffies; - netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n", - pf->tx_timeout_recovery_level, hung_queue); + netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n", + pf->tx_timeout_recovery_level, txqueue); switch (pf->tx_timeout_recovery_level) { case 1: @@ -4295,6 +5111,7 @@ int ice_open(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_port_info *pi; int err; if (test_bit(__ICE_NEEDS_RESTART, vsi->back->state)) { @@ -4304,13 +5121,32 @@ int ice_open(struct net_device *netdev) netif_carrier_off(netdev); - err = ice_force_phys_link_state(vsi, true); + pi = vsi->port_info; + err = ice_update_link_info(pi); if (err) { - netdev_err(netdev, - "Failed to set physical link up, error %d\n", err); + netdev_err(netdev, "Failed to get link info, error %d\n", + err); return err; } + /* Set PHY if there is media, otherwise, turn off PHY */ + if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + err = ice_force_phys_link_state(vsi, true); + if (err) { + netdev_err(netdev, "Failed to set physical link up, error %d\n", + err); + return err; + } + } else { + err = ice_aq_set_link_restart_an(pi, false, NULL); + if (err) { + netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n", + vsi->vsi_num, err); + return err; + } + set_bit(ICE_FLAG_NO_MEDIA, vsi->back->flags); + } + err = ice_vsi_open(vsi); if (err) netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", @@ -4388,6 +5224,17 @@ out_rm_features: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } +static const struct net_device_ops ice_netdev_safe_mode_ops = { + .ndo_open = ice_open, + .ndo_stop = ice_stop, + .ndo_start_xmit = ice_start_xmit, + .ndo_set_mac_address = ice_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ice_change_mtu, + .ndo_get_stats64 = ice_get_stats64, + .ndo_tx_timeout = ice_tx_timeout, +}; + static const struct net_device_ops ice_netdev_ops = { .ndo_open = ice_open, .ndo_stop = ice_stop, @@ -4398,12 +5245,14 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, + .ndo_set_tx_maxrate = ice_set_tx_maxrate, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, .ndo_set_vf_trust = ice_set_vf_trust, .ndo_set_vf_vlan = ice_set_vf_port_vlan, .ndo_set_vf_link_state = ice_set_vf_link_state, + .ndo_get_vf_stats = ice_get_vf_stats, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_set_features = ice_set_features, @@ -4412,4 +5261,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_fdb_add = ice_fdb_add, .ndo_fdb_del = ice_fdb_del, .ndo_tx_timeout = ice_tx_timeout, + .ndo_bpf = ice_xdp, + .ndo_xdp_xmit = ice_xdp_xmit, + .ndo_xsk_wakeup = ice_xsk_wakeup, }; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index bcb431f1bd92..7525ac50742e 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -219,8 +219,7 @@ static void ice_release_nvm(struct ice_hw *hw) * * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq. */ -static enum ice_status -ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) +enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) { enum ice_status status; @@ -242,9 +241,10 @@ ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) */ enum ice_status ice_init_nvm(struct ice_hw *hw) { + u16 oem_hi, oem_lo, boot_cfg_tlv, boot_cfg_tlv_len; struct ice_nvm_info *nvm = &hw->nvm; u16 eetrack_lo, eetrack_hi; - enum ice_status status = 0; + enum ice_status status; u32 fla, gens_stat; u8 sr_size; @@ -261,15 +261,15 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) fla = rd32(hw, GLNVM_FLA); if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */ nvm->blank_nvm_mode = false; - } else { /* Blank programming mode */ + } else { + /* Blank programming mode */ nvm->blank_nvm_mode = true; - status = ICE_ERR_NVM_BLANK_MODE; ice_debug(hw, ICE_DBG_NVM, "NVM init error: unsupported blank mode.\n"); - return status; + return ICE_ERR_NVM_BLANK_MODE; } - status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver); + status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &nvm->ver); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to read DEV starter version.\n"); @@ -287,9 +287,54 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return status; } - hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo; + nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; + + /* the following devices do not have boot_cfg_tlv yet */ + if (hw->device_id == ICE_DEV_ID_E822C_BACKPLANE || + hw->device_id == ICE_DEV_ID_E822C_QSFP || + hw->device_id == ICE_DEV_ID_E822C_10G_BASE_T || + hw->device_id == ICE_DEV_ID_E822C_SGMII || + hw->device_id == ICE_DEV_ID_E822C_SFP || + hw->device_id == ICE_DEV_ID_E822X_BACKPLANE || + hw->device_id == ICE_DEV_ID_E822L_SFP || + hw->device_id == ICE_DEV_ID_E822L_10G_BASE_T || + hw->device_id == ICE_DEV_ID_E822L_SGMII) + return status; - return status; + status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len, + ICE_SR_BOOT_CFG_PTR); + if (status) { + ice_debug(hw, ICE_DBG_INIT, + "Failed to read Boot Configuration Block TLV.\n"); + return status; + } + + /* Boot Configuration Block must have length at least 2 words + * (Combo Image Version High and Combo Image Version Low) + */ + if (boot_cfg_tlv_len < 2) { + ice_debug(hw, ICE_DBG_INIT, + "Invalid Boot Configuration Block TLV size.\n"); + return ICE_ERR_INVAL_SIZE; + } + + status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF), + &oem_hi); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER hi.\n"); + return status; + } + + status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF + 1), + &oem_lo); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER lo.\n"); + return status; + } + + nvm->oem_ver = ((u32)oem_hi << 16) | oem_lo; + + return 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h new file mode 100644 index 000000000000..a9fa011c22c6 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_NVM_H_ +#define _ICE_NVM_H_ + +enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data); +#endif /* _ICE_NVM_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h new file mode 100644 index 000000000000..71647566964e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_PROTOCOL_TYPE_H_ +#define _ICE_PROTOCOL_TYPE_H_ +/* Decoders for ice_prot_id: + * - F: First + * - I: Inner + * - L: Last + * - O: Outer + * - S: Single + */ +enum ice_prot_id { + ICE_PROT_ID_INVAL = 0, + ICE_PROT_IPV4_OF_OR_S = 32, + ICE_PROT_IPV4_IL = 33, + ICE_PROT_IPV6_OF_OR_S = 40, + ICE_PROT_IPV6_IL = 41, + ICE_PROT_TCP_IL = 49, + ICE_PROT_UDP_IL_OR_S = 53, + ICE_PROT_SCTP_IL = 96, + ICE_PROT_META_ID = 255, /* when offset == metadata */ + ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ +}; +#endif /* _ICE_PROTOCOL_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2a232504379d..eae707ddf8e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -260,33 +260,17 @@ ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent, /** * ice_sched_get_first_node - get the first node of the given layer - * @hw: pointer to the HW struct + * @pi: port information structure * @parent: pointer the base node of the subtree * @layer: layer number * * This function retrieves the first node of the given layer from the subtree */ static struct ice_sched_node * -ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent, - u8 layer) +ice_sched_get_first_node(struct ice_port_info *pi, + struct ice_sched_node *parent, u8 layer) { - u8 i; - - if (layer < hw->sw_entry_point_layer) - return NULL; - for (i = 0; i < parent->num_children; i++) { - struct ice_sched_node *node = parent->children[i]; - - if (node) { - if (node->tx_sched_layer == layer) - return node; - /* this recursion is intentional, and wouldn't - * go more than 9 calls - */ - return ice_sched_get_first_node(hw, node, layer); - } - } - return NULL; + return pi->sib_head[parent->tc_num][layer]; } /** @@ -300,7 +284,7 @@ struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc) { u8 i; - if (!pi) + if (!pi || !pi->root) return NULL; for (i = 0; i < pi->root->num_children; i++) if (pi->root->children[i]->tc_num == tc) @@ -342,7 +326,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) parent = node->parent; /* root has no parent */ if (parent) { - struct ice_sched_node *p, *tc_node; + struct ice_sched_node *p; /* update the parent */ for (i = 0; i < parent->num_children; i++) @@ -354,16 +338,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) break; } - /* search for previous sibling that points to this node and - * remove the reference - */ - tc_node = ice_sched_get_tc_node(pi, node->tc_num); - if (!tc_node) { - ice_debug(hw, ICE_DBG_SCHED, - "Invalid TC number %d\n", node->tc_num); - goto err_exit; - } - p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer); + p = ice_sched_get_first_node(pi, node, node->tx_sched_layer); while (p) { if (p->sibling == node) { p->sibling = node->sibling; @@ -371,8 +346,13 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) } p = p->sibling; } + + /* update the sibling head if head is getting removed */ + if (pi->sib_head[node->tc_num][node->tx_sched_layer] == node) + pi->sib_head[node->tc_num][node->tx_sched_layer] = + node->sibling; } -err_exit: + /* leaf nodes have no children */ if (node->children) devm_kfree(ice_hw_to_dev(hw), node->children); @@ -431,6 +411,27 @@ ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req, } /** + * ice_aq_cfg_sched_elems - configures scheduler elements + * @hw: pointer to the HW struct + * @elems_req: number of elements to configure + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_cfgd: returns total number of elements configured + * @cd: pointer to command details structure or NULL + * + * Configure scheduling elements (0x0403) + */ +static enum ice_status +ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_conf_elem *buf, u16 buf_size, + u16 *elems_cfgd, struct ice_sq_cd *cd) +{ + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems, + elems_req, (void *)buf, buf_size, + elems_cfgd, cd); +} + +/** * ice_aq_suspend_sched_elems - suspend scheduler elements * @hw: pointer to the HW struct * @elems_req: number of elements to suspend @@ -577,6 +578,149 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs) } /** + * ice_aq_rl_profile - performs a rate limiting task + * @hw: pointer to the HW struct + * @opcode:opcode for add, query, or remove profile(s) + * @num_profiles: the number of profiles + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_processed: number of processed add or remove profile(s) to return + * @cd: pointer to command details structure + * + * RL profile function to add, query, or remove profile(s) + */ +static enum ice_status +ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode, + u16 num_profiles, struct ice_aqc_rl_profile_generic_elem *buf, + u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd) +{ + struct ice_aqc_rl_profile *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.rl_profile; + + ice_fill_dflt_direct_cmd_desc(&desc, opcode); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + cmd->num_profiles = cpu_to_le16(num_profiles); + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && num_processed) + *num_processed = le16_to_cpu(cmd->num_processed); + return status; +} + +/** + * ice_aq_add_rl_profile - adds rate limiting profile(s) + * @hw: pointer to the HW struct + * @num_profiles: the number of profile(s) to be add + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_profiles_added: total number of profiles added to return + * @cd: pointer to command details structure + * + * Add RL profile (0x0410) + */ +static enum ice_status +ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles, + struct ice_aqc_rl_profile_generic_elem *buf, + u16 buf_size, u16 *num_profiles_added, + struct ice_sq_cd *cd) +{ + return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles, + num_profiles, buf, + buf_size, num_profiles_added, cd); +} + +/** + * ice_aq_remove_rl_profile - removes RL profile(s) + * @hw: pointer to the HW struct + * @num_profiles: the number of profile(s) to remove + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_profiles_removed: total number of profiles removed to return + * @cd: pointer to command details structure or NULL + * + * Remove RL profile (0x0415) + */ +static enum ice_status +ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles, + struct ice_aqc_rl_profile_generic_elem *buf, + u16 buf_size, u16 *num_profiles_removed, + struct ice_sq_cd *cd) +{ + return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles, + num_profiles, buf, + buf_size, num_profiles_removed, cd); +} + +/** + * ice_sched_del_rl_profile - remove RL profile + * @hw: pointer to the HW struct + * @rl_info: rate limit profile information + * + * If the profile ID is not referenced anymore, it removes profile ID with + * its associated parameters from HW DB,and locally. The caller needs to + * hold scheduler lock. + */ +static enum ice_status +ice_sched_del_rl_profile(struct ice_hw *hw, + struct ice_aqc_rl_profile_info *rl_info) +{ + struct ice_aqc_rl_profile_generic_elem *buf; + u16 num_profiles_removed; + enum ice_status status; + u16 num_profiles = 1; + + if (rl_info->prof_id_ref != 0) + return ICE_ERR_IN_USE; + + /* Safe to remove profile ID */ + buf = (struct ice_aqc_rl_profile_generic_elem *) + &rl_info->profile; + status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf), + &num_profiles_removed, NULL); + if (status || num_profiles_removed != num_profiles) + return ICE_ERR_CFG; + + /* Delete stale entry now */ + list_del(&rl_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), rl_info); + return status; +} + +/** + * ice_sched_clear_rl_prof - clears RL prof entries + * @pi: port information structure + * + * This function removes all RL profile from HW as well as from SW DB. + */ +static void ice_sched_clear_rl_prof(struct ice_port_info *pi) +{ + u16 ln; + + for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) { + struct ice_aqc_rl_profile_info *rl_prof_elem; + struct ice_aqc_rl_profile_info *rl_prof_tmp; + + list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp, + &pi->rl_prof_list[ln], list_entry) { + struct ice_hw *hw = pi->hw; + enum ice_status status; + + rl_prof_elem->prof_id_ref = 0; + status = ice_sched_del_rl_profile(hw, rl_prof_elem); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, + "Remove rl profile failed\n"); + /* On error, free mem required */ + list_del(&rl_prof_elem->list_entry); + devm_kfree(ice_hw_to_dev(hw), rl_prof_elem); + } + } + } +} + +/** * ice_sched_clear_agg - clears the aggregator related information * @hw: pointer to the hardware structure * @@ -612,6 +756,8 @@ static void ice_sched_clear_tx_topo(struct ice_port_info *pi) { if (!pi) return; + /* remove RL profiles related lists */ + ice_sched_clear_rl_prof(pi); if (pi->root) { ice_free_sched_node(pi, pi->root); pi->root = NULL; @@ -652,8 +798,7 @@ void ice_sched_cleanup_all(struct ice_hw *hw) hw->layer_info = NULL; } - if (hw->port_info) - ice_sched_clear_port(hw->port_info); + ice_sched_clear_port(hw->port_info); hw->num_tx_sched_layers = 0; hw->num_tx_sched_phys_layers = 0; @@ -743,13 +888,17 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, /* add it to previous node sibling pointer */ /* Note: siblings are not linked across branches */ - prev = ice_sched_get_first_node(hw, tc_node, layer); + prev = ice_sched_get_first_node(pi, tc_node, layer); if (prev && prev != new_node) { while (prev->sibling) prev = prev->sibling; prev->sibling = new_node; } + /* initialize the sibling head */ + if (!pi->sib_head[tc_node->tc_num][layer]) + pi->sib_head[tc_node->tc_num][layer] = new_node; + if (i == 0) *first_node_teid = teid; } @@ -1030,6 +1179,8 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi) /* initialize the port for handling the scheduler tree */ pi->port_state = ICE_SCHED_PORT_STATE_READY; mutex_init(&pi->sched_lock); + for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) + INIT_LIST_HEAD(&pi->rl_prof_list[i]); err_init_port: if (status && pi->root) { @@ -1052,7 +1203,7 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) struct ice_aqc_query_txsched_res_resp *buf; enum ice_status status = 0; __le16 max_sibl; - u8 i; + u16 i; if (hw->layer_info) return status; @@ -1078,8 +1229,8 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) * and so on. This array will be populated from root (index 0) to * qgroup layer 7. Leaf node has no children. */ - for (i = 0; i < hw->num_tx_sched_layers; i++) { - max_sibl = buf->layer_props[i].max_sibl_grp_sz; + for (i = 0; i < hw->num_tx_sched_layers - 1; i++) { + max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz; hw->max_children[i] = le16_to_cpu(max_sibl); } @@ -1160,7 +1311,7 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, goto lan_q_exit; /* get the first queue group node from VSI sub-tree */ - qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer); + qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); while (qgrp_node) { /* make sure the qgroup node is part of the VSI subtree */ if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node)) @@ -1191,7 +1342,7 @@ ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node, u8 vsi_layer; vsi_layer = ice_sched_get_vsi_layer(hw); - node = ice_sched_get_first_node(hw, tc_node, vsi_layer); + node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer); /* Check whether it already exists */ while (node) { @@ -1316,7 +1467,8 @@ ice_sched_calc_vsi_support_nodes(struct ice_hw *hw, /* If intermediate nodes are reached max children * then add a new one. */ - node = ice_sched_get_first_node(hw, tc_node, (u8)i); + node = ice_sched_get_first_node(hw->port_info, tc_node, + (u8)i); /* scan all the siblings */ while (node) { if (node->num_children < hw->max_children[i]) @@ -1685,3 +1837,1095 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle) { return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN); } + +/** + * ice_sched_rm_unused_rl_prof - remove unused RL profile + * @pi: port information structure + * + * This function removes unused rate limit profiles from the HW and + * SW DB. The caller needs to hold scheduler lock. + */ +static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi) +{ + u16 ln; + + for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) { + struct ice_aqc_rl_profile_info *rl_prof_elem; + struct ice_aqc_rl_profile_info *rl_prof_tmp; + + list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp, + &pi->rl_prof_list[ln], list_entry) { + if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem)) + ice_debug(pi->hw, ICE_DBG_SCHED, + "Removed rl profile\n"); + } + } +} + +/** + * ice_sched_update_elem - update element + * @hw: pointer to the HW struct + * @node: pointer to node + * @info: node info to update + * + * It updates the HW DB, and local SW DB of node. It updates the scheduling + * parameters of node from argument info data buffer (Info->data buf) and + * returns success or error on config sched element failure. The caller + * needs to hold scheduler lock. + */ +static enum ice_status +ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node, + struct ice_aqc_txsched_elem_data *info) +{ + struct ice_aqc_conf_elem buf; + enum ice_status status; + u16 elem_cfgd = 0; + u16 num_elems = 1; + + buf.generic[0] = *info; + /* Parent TEID is reserved field in this aq call */ + buf.generic[0].parent_teid = 0; + /* Element type is reserved field in this aq call */ + buf.generic[0].data.elem_type = 0; + /* Flags is reserved field in this aq call */ + buf.generic[0].data.flags = 0; + + /* Update HW DB */ + /* Configure element node */ + status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf), + &elem_cfgd, NULL); + if (status || elem_cfgd != num_elems) { + ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n"); + return ICE_ERR_CFG; + } + + /* Config success case */ + /* Now update local SW DB */ + /* Only copy the data portion of info buffer */ + node->info.data = info->data; + return status; +} + +/** + * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params + * @hw: pointer to the HW struct + * @node: sched node to configure + * @rl_type: rate limit type CIR, EIR, or shared + * @bw_alloc: BW weight/allocation + * + * This function configures node element's BW allocation. + */ +static enum ice_status +ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node, + enum ice_rl_type rl_type, u8 bw_alloc) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + enum ice_status status; + + buf = node->info; + data = &buf.data; + if (rl_type == ICE_MIN_BW) { + data->valid_sections |= ICE_AQC_ELEM_VALID_CIR; + data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc); + } else if (rl_type == ICE_MAX_BW) { + data->valid_sections |= ICE_AQC_ELEM_VALID_EIR; + data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc); + } else { + return ICE_ERR_PARAM; + } + + /* Configure element */ + status = ice_sched_update_elem(hw, node, &buf); + return status; +} + +/** + * ice_set_clear_cir_bw - set or clear CIR BW + * @bw_t_info: bandwidth type information structure + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save or clear CIR bandwidth (BW) in the passed param bw_t_info. + */ +static void +ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw) +{ + if (bw == ICE_SCHED_DFLT_BW) { + clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap); + bw_t_info->cir_bw.bw = 0; + } else { + /* Save type of BW information */ + set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap); + bw_t_info->cir_bw.bw = bw; + } +} + +/** + * ice_set_clear_eir_bw - set or clear EIR BW + * @bw_t_info: bandwidth type information structure + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save or clear EIR bandwidth (BW) in the passed param bw_t_info. + */ +static void +ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw) +{ + if (bw == ICE_SCHED_DFLT_BW) { + clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap); + bw_t_info->eir_bw.bw = 0; + } else { + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element. + * First clear earlier saved shared BW information. + */ + clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap); + bw_t_info->shared_bw = 0; + /* save EIR BW information */ + set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap); + bw_t_info->eir_bw.bw = bw; + } +} + +/** + * ice_set_clear_shared_bw - set or clear shared BW + * @bw_t_info: bandwidth type information structure + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save or clear shared bandwidth (BW) in the passed param bw_t_info. + */ +static void +ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw) +{ + if (bw == ICE_SCHED_DFLT_BW) { + clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap); + bw_t_info->shared_bw = 0; + } else { + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element. + * First clear earlier saved EIR BW information. + */ + clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap); + bw_t_info->eir_bw.bw = 0; + /* save shared BW information */ + set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap); + bw_t_info->shared_bw = bw; + } +} + +/** + * ice_sched_calc_wakeup - calculate RL profile wakeup parameter + * @bw: bandwidth in Kbps + * + * This function calculates the wakeup parameter of RL profile. + */ +static u16 ice_sched_calc_wakeup(s32 bw) +{ + s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f; + s32 wakeup_f_int; + u16 wakeup = 0; + + /* Get the wakeup integer value */ + bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE); + wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec); + if (wakeup_int > 63) { + wakeup = (u16)((1 << 15) | wakeup_int); + } else { + /* Calculate fraction value up to 4 decimals + * Convert Integer value to a constant multiplier + */ + wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int; + wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER * + ICE_RL_PROF_FREQUENCY, + bytes_per_sec); + + /* Get Fraction value */ + wakeup_f = wakeup_a - wakeup_b; + + /* Round up the Fractional value via Ceil(Fractional value) */ + if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2)) + wakeup_f += 1; + + wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION, + ICE_RL_PROF_MULTIPLIER); + wakeup |= (u16)(wakeup_int << 9); + wakeup |= (u16)(0x1ff & wakeup_f_int); + } + + return wakeup; +} + +/** + * ice_sched_bw_to_rl_profile - convert BW to profile parameters + * @bw: bandwidth in Kbps + * @profile: profile parameters to return + * + * This function converts the BW to profile structure format. + */ +static enum ice_status +ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) +{ + enum ice_status status = ICE_ERR_PARAM; + s64 bytes_per_sec, ts_rate, mv_tmp; + bool found = false; + s32 encode = 0; + s64 mv = 0; + s32 i; + + /* Bw settings range is from 0.5Mb/sec to 100Gb/sec */ + if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW) + return status; + + /* Bytes per second from Kbps */ + bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE); + + /* encode is 6 bits but really useful are 5 bits */ + for (i = 0; i < 64; i++) { + u64 pow_result = BIT_ULL(i); + + ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY, + pow_result * ICE_RL_PROF_TS_MULTIPLIER); + if (ts_rate <= 0) + continue; + + /* Multiplier value */ + mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER, + ts_rate); + + /* Round to the nearest ICE_RL_PROF_MULTIPLIER */ + mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER); + + /* First multiplier value greater than the given + * accuracy bytes + */ + if (mv > ICE_RL_PROF_ACCURACY_BYTES) { + encode = i; + found = true; + break; + } + } + if (found) { + u16 wm; + + wm = ice_sched_calc_wakeup(bw); + profile->rl_multiply = cpu_to_le16(mv); + profile->wake_up_calc = cpu_to_le16(wm); + profile->rl_encode = cpu_to_le16(encode); + status = 0; + } else { + status = ICE_ERR_DOES_NOT_EXIST; + } + + return status; +} + +/** + * ice_sched_add_rl_profile - add RL profile + * @pi: port information structure + * @rl_type: type of rate limit BW - min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * @layer_num: specifies in which layer to create profile + * + * This function first checks the existing list for corresponding BW + * parameter. If it exists, it returns the associated profile otherwise + * it creates a new rate limit profile for requested BW, and adds it to + * the HW DB and local list. It returns the new profile or null on error. + * The caller needs to hold the scheduler lock. + */ +static struct ice_aqc_rl_profile_info * +ice_sched_add_rl_profile(struct ice_port_info *pi, + enum ice_rl_type rl_type, u32 bw, u8 layer_num) +{ + struct ice_aqc_rl_profile_generic_elem *buf; + struct ice_aqc_rl_profile_info *rl_prof_elem; + u16 profiles_added = 0, num_profiles = 1; + enum ice_status status; + struct ice_hw *hw; + u8 profile_type; + + if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM) + return NULL; + switch (rl_type) { + case ICE_MIN_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR; + break; + case ICE_MAX_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR; + break; + case ICE_SHARED_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL; + break; + default: + return NULL; + } + + if (!pi) + return NULL; + hw = pi->hw; + list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num], + list_entry) + if (rl_prof_elem->profile.flags == profile_type && + rl_prof_elem->bw == bw) + /* Return existing profile ID info */ + return rl_prof_elem; + + /* Create new profile ID */ + rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem), + GFP_KERNEL); + + if (!rl_prof_elem) + return NULL; + + status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile); + if (status) + goto exit_add_rl_prof; + + rl_prof_elem->bw = bw; + /* layer_num is zero relative, and fw expects level from 1 to 9 */ + rl_prof_elem->profile.level = layer_num + 1; + rl_prof_elem->profile.flags = profile_type; + rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size); + + /* Create new entry in HW DB */ + buf = (struct ice_aqc_rl_profile_generic_elem *) + &rl_prof_elem->profile; + status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf), + &profiles_added, NULL); + if (status || profiles_added != num_profiles) + goto exit_add_rl_prof; + + /* Good entry - add in the list */ + rl_prof_elem->prof_id_ref = 0; + list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]); + return rl_prof_elem; + +exit_add_rl_prof: + devm_kfree(ice_hw_to_dev(hw), rl_prof_elem); + return NULL; +} + +/** + * ice_sched_cfg_node_bw_lmt - configure node sched params + * @hw: pointer to the HW struct + * @node: sched node to configure + * @rl_type: rate limit type CIR, EIR, or shared + * @rl_prof_id: rate limit profile ID + * + * This function configures node element's BW limit. + */ +static enum ice_status +ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node, + enum ice_rl_type rl_type, u16 rl_prof_id) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + + buf = node->info; + data = &buf.data; + switch (rl_type) { + case ICE_MIN_BW: + data->valid_sections |= ICE_AQC_ELEM_VALID_CIR; + data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id); + break; + case ICE_MAX_BW: + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element + */ + if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED) + return ICE_ERR_CFG; + data->valid_sections |= ICE_AQC_ELEM_VALID_EIR; + data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id); + break; + case ICE_SHARED_BW: + /* Check for removing shared BW */ + if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) { + /* remove shared profile */ + data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED; + data->srl_id = 0; /* clear SRL field */ + + /* enable back EIR to default profile */ + data->valid_sections |= ICE_AQC_ELEM_VALID_EIR; + data->eir_bw.bw_profile_idx = + cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID); + break; + } + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element + */ + if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) && + (le16_to_cpu(data->eir_bw.bw_profile_idx) != + ICE_SCHED_DFLT_RL_PROF_ID)) + return ICE_ERR_CFG; + /* EIR BW is set to default, disable it */ + data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR; + /* Okay to enable shared BW now */ + data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED; + data->srl_id = cpu_to_le16(rl_prof_id); + break; + default: + /* Unknown rate limit type */ + return ICE_ERR_PARAM; + } + + /* Configure element */ + return ice_sched_update_elem(hw, node, &buf); +} + +/** + * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID + * @node: sched node + * @rl_type: rate limit type + * + * If existing profile matches, it returns the corresponding rate + * limit profile ID, otherwise it returns an invalid ID as error. + */ +static u16 +ice_sched_get_node_rl_prof_id(struct ice_sched_node *node, + enum ice_rl_type rl_type) +{ + u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID; + struct ice_aqc_txsched_elem *data; + + data = &node->info.data; + switch (rl_type) { + case ICE_MIN_BW: + if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR) + rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx); + break; + case ICE_MAX_BW: + if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR) + rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx); + break; + case ICE_SHARED_BW: + if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED) + rl_prof_id = le16_to_cpu(data->srl_id); + break; + default: + break; + } + + return rl_prof_id; +} + +/** + * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer + * @pi: port information structure + * @rl_type: type of rate limit BW - min, max, or shared + * @layer_index: layer index + * + * This function returns requested profile creation layer. + */ +static u8 +ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type, + u8 layer_index) +{ + struct ice_hw *hw = pi->hw; + + if (layer_index >= hw->num_tx_sched_layers) + return ICE_SCHED_INVAL_LAYER_NUM; + switch (rl_type) { + case ICE_MIN_BW: + if (hw->layer_info[layer_index].max_cir_rl_profiles) + return layer_index; + break; + case ICE_MAX_BW: + if (hw->layer_info[layer_index].max_eir_rl_profiles) + return layer_index; + break; + case ICE_SHARED_BW: + /* if current layer doesn't support SRL profile creation + * then try a layer up or down. + */ + if (hw->layer_info[layer_index].max_srl_profiles) + return layer_index; + else if (layer_index < hw->num_tx_sched_layers - 1 && + hw->layer_info[layer_index + 1].max_srl_profiles) + return layer_index + 1; + else if (layer_index > 0 && + hw->layer_info[layer_index - 1].max_srl_profiles) + return layer_index - 1; + break; + default: + break; + } + return ICE_SCHED_INVAL_LAYER_NUM; +} + +/** + * ice_sched_get_srl_node - get shared rate limit node + * @node: tree node + * @srl_layer: shared rate limit layer + * + * This function returns SRL node to be used for shared rate limit purpose. + * The caller needs to hold scheduler lock. + */ +static struct ice_sched_node * +ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer) +{ + if (srl_layer > node->tx_sched_layer) + return node->children[0]; + else if (srl_layer < node->tx_sched_layer) + /* Node can't be created without a parent. It will always + * have a valid parent except root node. + */ + return node->parent; + else + return node; +} + +/** + * ice_sched_rm_rl_profile - remove RL profile ID + * @pi: port information structure + * @layer_num: layer number where profiles are saved + * @profile_type: profile type like EIR, CIR, or SRL + * @profile_id: profile ID to remove + * + * This function removes rate limit profile from layer 'layer_num' of type + * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold + * scheduler lock. + */ +static enum ice_status +ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type, + u16 profile_id) +{ + struct ice_aqc_rl_profile_info *rl_prof_elem; + enum ice_status status = 0; + + if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM) + return ICE_ERR_PARAM; + /* Check the existing list for RL profile */ + list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num], + list_entry) + if (rl_prof_elem->profile.flags == profile_type && + le16_to_cpu(rl_prof_elem->profile.profile_id) == + profile_id) { + if (rl_prof_elem->prof_id_ref) + rl_prof_elem->prof_id_ref--; + + /* Remove old profile ID from database */ + status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem); + if (status && status != ICE_ERR_IN_USE) + ice_debug(pi->hw, ICE_DBG_SCHED, + "Remove rl profile failed\n"); + break; + } + if (status == ICE_ERR_IN_USE) + status = 0; + return status; +} + +/** + * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default + * @pi: port information structure + * @node: pointer to node structure + * @rl_type: rate limit type min, max, or shared + * @layer_num: layer number where RL profiles are saved + * + * This function configures node element's BW rate limit profile ID of + * type CIR, EIR, or SRL to default. This function needs to be called + * with the scheduler lock held. + */ +static enum ice_status +ice_sched_set_node_bw_dflt(struct ice_port_info *pi, + struct ice_sched_node *node, + enum ice_rl_type rl_type, u8 layer_num) +{ + enum ice_status status; + struct ice_hw *hw; + u8 profile_type; + u16 rl_prof_id; + u16 old_id; + + hw = pi->hw; + switch (rl_type) { + case ICE_MIN_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR; + rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID; + break; + case ICE_MAX_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR; + rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID; + break; + case ICE_SHARED_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL; + /* No SRL is configured for default case */ + rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID; + break; + default: + return ICE_ERR_PARAM; + } + /* Save existing RL prof ID for later clean up */ + old_id = ice_sched_get_node_rl_prof_id(node, rl_type); + /* Configure BW scheduling parameters */ + status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id); + if (status) + return status; + + /* Remove stale RL profile ID */ + if (old_id == ICE_SCHED_DFLT_RL_PROF_ID || + old_id == ICE_SCHED_INVAL_PROF_ID) + return 0; + + return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id); +} + +/** + * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness + * @pi: port information structure + * @node: pointer to node structure + * @layer_num: layer number where rate limit profiles are saved + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth value + * + * This function prepares node element's bandwidth to SRL or EIR exclusively. + * EIR BW and Shared BW profiles are mutually exclusive and hence only one of + * them may be set for any given element. This function needs to be called + * with the scheduler lock held. + */ +static enum ice_status +ice_sched_set_eir_srl_excl(struct ice_port_info *pi, + struct ice_sched_node *node, + u8 layer_num, enum ice_rl_type rl_type, u32 bw) +{ + if (rl_type == ICE_SHARED_BW) { + /* SRL node passed in this case, it may be different node */ + if (bw == ICE_SCHED_DFLT_BW) + /* SRL being removed, ice_sched_cfg_node_bw_lmt() + * enables EIR to default. EIR is not set in this + * case, so no additional action is required. + */ + return 0; + + /* SRL being configured, set EIR to default here. + * ice_sched_cfg_node_bw_lmt() disables EIR when it + * configures SRL + */ + return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW, + layer_num); + } else if (rl_type == ICE_MAX_BW && + node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) { + /* Remove Shared profile. Set default shared BW call + * removes shared profile for a node. + */ + return ice_sched_set_node_bw_dflt(pi, node, + ICE_SHARED_BW, + layer_num); + } + return 0; +} + +/** + * ice_sched_set_node_bw - set node's bandwidth + * @pi: port information structure + * @node: tree node + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * @layer_num: layer number + * + * This function adds new profile corresponding to requested BW, configures + * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile + * ID from local database. The caller needs to hold scheduler lock. + */ +static enum ice_status +ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw, u8 layer_num) +{ + struct ice_aqc_rl_profile_info *rl_prof_info; + enum ice_status status = ICE_ERR_PARAM; + struct ice_hw *hw = pi->hw; + u16 old_id, rl_prof_id; + + rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num); + if (!rl_prof_info) + return status; + + rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id); + + /* Save existing RL prof ID for later clean up */ + old_id = ice_sched_get_node_rl_prof_id(node, rl_type); + /* Configure BW scheduling parameters */ + status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id); + if (status) + return status; + + /* New changes has been applied */ + /* Increment the profile ID reference count */ + rl_prof_info->prof_id_ref++; + + /* Check for old ID removal */ + if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) || + old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id) + return 0; + + return ice_sched_rm_rl_profile(pi, layer_num, + rl_prof_info->profile.flags, + old_id); +} + +/** + * ice_sched_set_node_bw_lmt - set node's BW limit + * @pi: port information structure + * @node: tree node + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * It updates node's BW limit parameters like BW RL profile ID of type CIR, + * EIR, or SRL. The caller needs to hold scheduler lock. + */ +static enum ice_status +ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw) +{ + struct ice_sched_node *cfg_node = node; + enum ice_status status; + + struct ice_hw *hw; + u8 layer_num; + + if (!pi) + return ICE_ERR_PARAM; + hw = pi->hw; + /* Remove unused RL profile IDs from HW and SW DB */ + ice_sched_rm_unused_rl_prof(pi); + layer_num = ice_sched_get_rl_prof_layer(pi, rl_type, + node->tx_sched_layer); + if (layer_num >= hw->num_tx_sched_layers) + return ICE_ERR_PARAM; + + if (rl_type == ICE_SHARED_BW) { + /* SRL node may be different */ + cfg_node = ice_sched_get_srl_node(node, layer_num); + if (!cfg_node) + return ICE_ERR_CFG; + } + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element + */ + status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type, + bw); + if (status) + return status; + if (bw == ICE_SCHED_DFLT_BW) + return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type, + layer_num); + return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num); +} + +/** + * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default + * @pi: port information structure + * @node: pointer to node structure + * @rl_type: rate limit type min, max, or shared + * + * This function configures node element's BW rate limit profile ID of + * type CIR, EIR, or SRL to default. This function needs to be called + * with the scheduler lock held. + */ +static enum ice_status +ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi, + struct ice_sched_node *node, + enum ice_rl_type rl_type) +{ + return ice_sched_set_node_bw_lmt(pi, node, rl_type, + ICE_SCHED_DFLT_BW); +} + +/** + * ice_sched_validate_srl_node - Check node for SRL applicability + * @node: sched node to configure + * @sel_layer: selected SRL layer + * + * This function checks if the SRL can be applied to a selected layer node on + * behalf of the requested node (first argument). This function needs to be + * called with scheduler lock held. + */ +static enum ice_status +ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer) +{ + /* SRL profiles are not available on all layers. Check if the + * SRL profile can be applied to a node above or below the + * requested node. SRL configuration is possible only if the + * selected layer's node has single child. + */ + if (sel_layer == node->tx_sched_layer || + ((sel_layer == node->tx_sched_layer + 1) && + node->num_children == 1) || + ((sel_layer == node->tx_sched_layer - 1) && + (node->parent && node->parent->num_children == 1))) + return 0; + + return ICE_ERR_CFG; +} + +/** + * ice_sched_save_q_bw - save queue node's BW information + * @q_ctx: queue context structure + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save BW information of queue type node for post replay use. + */ +static enum ice_status +ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw) +{ + switch (rl_type) { + case ICE_MIN_BW: + ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw); + break; + case ICE_MAX_BW: + ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw); + break; + case ICE_SHARED_BW: + ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw); + break; + default: + return ICE_ERR_PARAM; + } + return 0; +} + +/** + * ice_sched_set_q_bw_lmt - sets queue BW limit + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @q_handle: software queue handle + * @rl_type: min, max, or shared + * @bw: bandwidth in Kbps + * + * This function sets BW limit of queue scheduling node. + */ +static enum ice_status +ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type, u32 bw) +{ + enum ice_status status = ICE_ERR_PARAM; + struct ice_sched_node *node; + struct ice_q_ctx *q_ctx; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + return ICE_ERR_PARAM; + mutex_lock(&pi->sched_lock); + q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle); + if (!q_ctx) + goto exit_q_bw_lmt; + node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid); + if (!node) { + ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n"); + goto exit_q_bw_lmt; + } + + /* Return error if it is not a leaf node */ + if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) + goto exit_q_bw_lmt; + + /* SRL bandwidth layer selection */ + if (rl_type == ICE_SHARED_BW) { + u8 sel_layer; /* selected layer */ + + sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type, + node->tx_sched_layer); + if (sel_layer >= pi->hw->num_tx_sched_layers) { + status = ICE_ERR_PARAM; + goto exit_q_bw_lmt; + } + status = ice_sched_validate_srl_node(node, sel_layer); + if (status) + goto exit_q_bw_lmt; + } + + if (bw == ICE_SCHED_DFLT_BW) + status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type); + else + status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw); + + if (!status) + status = ice_sched_save_q_bw(q_ctx, rl_type, bw); + +exit_q_bw_lmt: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_q_bw_lmt - configure queue BW limit + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @q_handle: software queue handle + * @rl_type: min, max, or shared + * @bw: bandwidth in Kbps + * + * This function configures BW limit of queue scheduling node. + */ +enum ice_status +ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type, u32 bw) +{ + return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type, + bw); +} + +/** + * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @q_handle: software queue handle + * @rl_type: min, max, or shared + * + * This function configures BW default limit of queue scheduling node. + */ +enum ice_status +ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type) +{ + return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type, + ICE_SCHED_DFLT_BW); +} + +/** + * ice_cfg_rl_burst_size - Set burst size value + * @hw: pointer to the HW struct + * @bytes: burst size in bytes + * + * This function configures/set the burst size to requested new value. The new + * burst size value is used for future rate limit calls. It doesn't change the + * existing or previously created RL profiles. + */ +enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes) +{ + u16 burst_size_to_prog; + + if (bytes < ICE_MIN_BURST_SIZE_ALLOWED || + bytes > ICE_MAX_BURST_SIZE_ALLOWED) + return ICE_ERR_PARAM; + if (ice_round_to_num(bytes, 64) <= + ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) { + /* 64 byte granularity case */ + /* Disable MSB granularity bit */ + burst_size_to_prog = ICE_64_BYTE_GRANULARITY; + /* round number to nearest 64 byte granularity */ + bytes = ice_round_to_num(bytes, 64); + /* The value is in 64 byte chunks */ + burst_size_to_prog |= (u16)(bytes / 64); + } else { + /* k bytes granularity case */ + /* Enable MSB granularity bit */ + burst_size_to_prog = ICE_KBYTE_GRANULARITY; + /* round number to nearest 1024 granularity */ + bytes = ice_round_to_num(bytes, 1024); + /* check rounding doesn't go beyond allowed */ + if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY) + bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY; + /* The value is in k bytes */ + burst_size_to_prog |= (u16)(bytes / 1024); + } + hw->max_burst_size = burst_size_to_prog; + return 0; +} + +/** + * ice_sched_replay_node_prio - re-configure node priority + * @hw: pointer to the HW struct + * @node: sched node to configure + * @priority: priority value + * + * This function configures node element's priority value. It + * needs to be called with scheduler lock held. + */ +static enum ice_status +ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node, + u8 priority) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + enum ice_status status; + + buf = node->info; + data = &buf.data; + data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC; + data->generic = priority; + + /* Configure element */ + status = ice_sched_update_elem(hw, node, &buf); + return status; +} + +/** + * ice_sched_replay_node_bw - replay node(s) BW + * @hw: pointer to the HW struct + * @node: sched node to configure + * @bw_t_info: BW type information + * + * This function restores node's BW from bw_t_info. The caller needs + * to hold the scheduler lock. + */ +static enum ice_status +ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node, + struct ice_bw_type_info *bw_t_info) +{ + struct ice_port_info *pi = hw->port_info; + enum ice_status status = ICE_ERR_PARAM; + u16 bw_alloc; + + if (!node) + return status; + if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT)) + return 0; + if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) { + status = ice_sched_replay_node_prio(hw, node, + bw_t_info->generic); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) { + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, + bw_t_info->cir_bw.bw); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) { + bw_alloc = bw_t_info->cir_bw.bw_alloc; + status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW, + bw_alloc); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) { + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, + bw_t_info->eir_bw.bw); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) { + bw_alloc = bw_t_info->eir_bw.bw_alloc; + status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW, + bw_alloc); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap)) + status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW, + bw_t_info->shared_bw); + return status; +} + +/** + * ice_sched_replay_q_bw - replay queue type node BW + * @pi: port information structure + * @q_ctx: queue context structure + * + * This function replays queue type node bandwidth. This function needs to be + * called with scheduler lock held. + */ +enum ice_status +ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx) +{ + struct ice_sched_node *q_node; + + /* Following also checks the presence of node in tree */ + q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid); + if (!q_node) + return ICE_ERR_PARAM; + return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info); +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 3902a8ad3025..f0593cfb6521 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -8,6 +8,36 @@ #define ICE_QGRP_LAYER_OFFSET 2 #define ICE_VSI_LAYER_OFFSET 4 +#define ICE_SCHED_INVAL_LAYER_NUM 0xFF +/* Burst size is a 12 bits register that is configured while creating the RL + * profile(s). MSB is a granularity bit and tells the granularity type + * 0 - LSB bits are in 64 bytes granularity + * 1 - LSB bits are in 1K bytes granularity + */ +#define ICE_64_BYTE_GRANULARITY 0 +#define ICE_KBYTE_GRANULARITY BIT(11) +#define ICE_MIN_BURST_SIZE_ALLOWED 64 /* In Bytes */ +#define ICE_MAX_BURST_SIZE_ALLOWED \ + ((BIT(11) - 1) * 1024) /* In Bytes */ +#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \ + ((BIT(11) - 1) * 64) /* In Bytes */ +#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED + +#define ICE_RL_PROF_FREQUENCY 446000000 +#define ICE_RL_PROF_ACCURACY_BYTES 128 +#define ICE_RL_PROF_MULTIPLIER 10000 +#define ICE_RL_PROF_TS_MULTIPLIER 32 +#define ICE_RL_PROF_FRACTION 512 + +/* BW rate limit profile parameters list entry along + * with bandwidth maintained per layer in port info + */ +struct ice_aqc_rl_profile_info { + struct ice_aqc_rl_profile_elem profile; + struct list_head list_entry; + u32 bw; /* requested */ + u16 prof_id_ref; /* profile ID to node association ref count */ +}; struct ice_sched_agg_vsi_info { struct list_head list_entry; @@ -48,4 +78,13 @@ enum ice_status ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, u8 owner, bool enable); enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle); +enum ice_status +ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type, u32 bw); +enum ice_status +ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type); +enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes); +enum ice_status +ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx); #endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index c01597885629..a9a8bc3aca42 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -26,6 +26,7 @@ enum ice_status { ICE_ERR_IN_USE = -16, ICE_ERR_MAX_LIMIT = -17, ICE_ERR_RESET_ONGOING = -18, + ICE_ERR_HW_TABLE = -19, ICE_ERR_NVM_CHECKSUM = -51, ICE_ERR_BUF_TOO_SHORT = -52, ICE_ERR_NVM_BLANK_MODE = -53, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 8271fd651725..431266081a80 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -50,42 +50,6 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi))) /** - * ice_aq_alloc_free_res - command to allocate/free resources - * @hw: pointer to the HW struct - * @num_entries: number of resource entries in buffer - * @buf: Indirect buffer to hold data parameters and response - * @buf_size: size of buffer for indirect commands - * @opc: pass in the command opcode - * @cd: pointer to command details structure or NULL - * - * Helper function to allocate/free resources using the admin queue commands - */ -static enum ice_status -ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, - struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size, - enum ice_adminq_opc opc, struct ice_sq_cd *cd) -{ - struct ice_aqc_alloc_free_res_cmd *cmd; - struct ice_aq_desc desc; - - cmd = &desc.params.sw_res_ctrl; - - if (!buf) - return ICE_ERR_PARAM; - - if (buf_size < (num_entries * sizeof(buf->elem[0]))) - return ICE_ERR_PARAM; - - ice_fill_dflt_direct_cmd_desc(&desc, opc); - - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); - - cmd->num_entries = cpu_to_le16(num_entries); - - return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); -} - -/** * ice_init_def_sw_recp - initialize the recipe book keeping tables * @hw: pointer to the HW struct * @@ -416,8 +380,7 @@ ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx); } else { /* update with new HW VSI num */ - if (tmp_vsi_ctx->vsi_num != vsi_ctx->vsi_num) - tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num; + tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num; } return 0; @@ -1623,12 +1586,13 @@ ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id, status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1, ice_aqc_opc_remove_sw_rules, NULL); - if (status) - goto exit; /* Remove a book keeping from the list */ devm_kfree(ice_hw_to_dev(hw), s_rule); + if (status) + goto exit; + list_del(&list_elem->list_entry); devm_kfree(ice_hw_to_dev(hw), list_elem); } @@ -2137,6 +2101,38 @@ out: } /** + * ice_find_ucast_rule_entry - Search for a unicast MAC filter rule entry + * @hw: pointer to the hardware structure + * @recp_id: lookup type for which the specified rule needs to be searched + * @f_info: rule information + * + * Helper function to search for a unicast rule entry - this is to be used + * to remove unicast MAC filter that is not shared with other VSIs on the + * PF switch. + * + * Returns pointer to entry storing the rule if found + */ +static struct ice_fltr_mgmt_list_entry * +ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id, + struct ice_fltr_info *f_info) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_mgmt_list_entry *list_itr; + struct list_head *list_head; + + list_head = &sw->recp_list[recp_id].filt_rules; + list_for_each_entry(list_itr, list_head, list_entry) { + if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data, + sizeof(f_info->l_data)) && + f_info->fwd_id.hw_vsi_id == + list_itr->fltr_info.fwd_id.hw_vsi_id && + f_info->flag == list_itr->fltr_info.flag) + return list_itr; + } + return NULL; +} + +/** * ice_remove_mac - remove a MAC address based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information @@ -2153,15 +2149,39 @@ enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_list) { struct ice_fltr_list_entry *list_itr, *tmp; + struct mutex *rule_lock; /* Lock to protect filter rule list */ if (!m_list) return ICE_ERR_PARAM; + rule_lock = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; list_for_each_entry_safe(list_itr, tmp, m_list, list_entry) { enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type; + u8 *add = &list_itr->fltr_info.l_data.mac.mac_addr[0]; + u16 vsi_handle; if (l_type != ICE_SW_LKUP_MAC) return ICE_ERR_PARAM; + + vsi_handle = list_itr->fltr_info.vsi_handle; + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + list_itr->fltr_info.fwd_id.hw_vsi_id = + ice_get_hw_vsi_num(hw, vsi_handle); + if (is_unicast_ether_addr(add) && !hw->ucast_shared) { + /* Don't remove the unicast address that belongs to + * another VSI on the switch, since it is not being + * shared... + */ + mutex_lock(rule_lock); + if (!ice_find_ucast_rule_entry(hw, ICE_SW_LKUP_MAC, + &list_itr->fltr_info)) { + mutex_unlock(rule_lock); + return ICE_ERR_DOES_NOT_EXIST; + } + mutex_unlock(rule_lock); + } list_itr->status = ice_remove_rule_internal(hw, ICE_SW_LKUP_MAC, list_itr); @@ -2372,7 +2392,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; - if (vid) + if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX)) recipe_id = ICE_SW_LKUP_PROMISC_VLAN; else recipe_id = ICE_SW_LKUP_PROMISC; @@ -2384,13 +2404,18 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, mutex_lock(rule_lock); list_for_each_entry(itr, rule_head, list_entry) { + struct ice_fltr_info *fltr_info; u8 fltr_promisc_mask = 0; if (!ice_vsi_uses_fltr(itr, vsi_handle)) continue; + fltr_info = &itr->fltr_info; + + if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN && + vid != fltr_info->l_data.mac_vlan.vlan_id) + continue; - fltr_promisc_mask |= - ice_determine_promisc_mask(&itr->fltr_info); + fltr_promisc_mask |= ice_determine_promisc_mask(fltr_info); /* Skip if filter is not completely specified by given mask */ if (fltr_promisc_mask & ~promisc_mask) @@ -2398,7 +2423,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, &remove_list_head, - &itr->fltr_info); + fltr_info); if (status) { mutex_unlock(rule_lock); goto free_fltr_list; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index cb123fbe30be..fa14b9545dab 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -14,11 +14,6 @@ #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF -/* VSI queue context structure */ -struct ice_q_ctx { - u16 q_handle; -}; - /* VSI context structure for add/get/update/free operations */ struct ice_vsi_ctx { u16 vsi_num; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3c83230434b6..4de61dbedd36 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -5,8 +5,13 @@ #include <linux/prefetch.h> #include <linux/mm.h> +#include <linux/bpf_trace.h> +#include <net/xdp.h> +#include "ice_txrx_lib.h" +#include "ice_lib.h" #include "ice.h" #include "ice_dcb_lib.h" +#include "ice_xsk.h" #define ICE_RX_HDR_SIZE 256 @@ -19,7 +24,10 @@ static void ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) { if (tx_buf->skb) { - dev_kfree_skb_any(tx_buf->skb); + if (ice_ring_is_xdp(ring)) + page_frag_free(tx_buf->raw_buf); + else + dev_kfree_skb_any(tx_buf->skb); if (dma_unmap_len(tx_buf, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buf, dma), @@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring) { u16 i; + if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) { + ice_xsk_clean_xdp_ring(tx_ring); + goto tx_skip_free; + } + /* ring already cleared, nothing to do */ if (!tx_ring->tx_buf) return; @@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring) for (i = 0; i < tx_ring->count; i++) ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]); +tx_skip_free: memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count); /* Zero out the descriptor ring */ @@ -95,17 +109,16 @@ void ice_free_tx_ring(struct ice_ring *tx_ring) /** * ice_clean_tx_irq - Reclaim resources after transmit completes - * @vsi: the VSI we care about * @tx_ring: Tx ring to clean * @napi_budget: Used to determine if we are in netpoll * * Returns true if there's any budget left (e.g. the clean is finished) */ -static bool -ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget) +static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) { unsigned int total_bytes = 0, total_pkts = 0; - unsigned int budget = vsi->work_lmt; + unsigned int budget = ICE_DFLT_IRQ_WORK; + struct ice_vsi *vsi = tx_ring->vsi; s16 i = tx_ring->next_to_clean; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; @@ -114,6 +127,8 @@ ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget) tx_desc = ICE_TX_DESC(tx_ring, i); i -= tx_ring->count; + prefetch(&vsi->state); + do { struct ice_tx_desc *eop_desc = tx_buf->next_to_watch; @@ -135,8 +150,11 @@ ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget) total_bytes += tx_buf->bytecount; total_pkts += tx_buf->gso_segs; - /* free the skb */ - napi_consume_skb(tx_buf->skb, napi_budget); + if (ice_ring_is_xdp(tx_ring)) + page_frag_free(tx_buf->raw_buf); + else + /* free the skb */ + napi_consume_skb(tx_buf->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -187,12 +205,11 @@ ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget) i += tx_ring->count; tx_ring->next_to_clean = i; - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->stats.bytes += total_bytes; - tx_ring->stats.pkts += total_pkts; - u64_stats_update_end(&tx_ring->syncp); - tx_ring->q_vector->tx.total_bytes += total_bytes; - tx_ring->q_vector->tx.total_pkts += total_pkts; + + ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes); + + if (ice_ring_is_xdp(tx_ring)) + return !!budget; netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, total_bytes); @@ -206,7 +223,7 @@ ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget) smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->q_index) && - !test_bit(__ICE_DOWN, vsi->state)) { + !test_bit(__ICE_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->q_index); ++tx_ring->tx_stats.restart_q; @@ -272,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) if (!rx_ring->rx_buf) return; + if (rx_ring->xsk_umem) { + ice_xsk_clean_rx_ring(rx_ring); + goto rx_skip_free; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; @@ -288,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) */ dma_sync_single_range_for_cpu(dev, rx_buf->dma, rx_buf->page_offset, - ICE_RXBUF_2048, DMA_FROM_DEVICE); + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); /* free resources associated with mapping */ - dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE, + dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); @@ -299,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) rx_buf->page_offset = 0; } +rx_skip_free: memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count); /* Zero out the descriptor ring */ @@ -318,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) void ice_free_rx_ring(struct ice_ring *rx_ring) { ice_clean_rx_ring(rx_ring); + if (rx_ring->vsi->type == ICE_VSI_PF) + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + rx_ring->xdp_prog = NULL; devm_kfree(rx_ring->dev, rx_ring->rx_buf); rx_ring->rx_buf = NULL; @@ -362,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring) rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; + + if (ice_is_xdp_ena_vsi(rx_ring->vsi)) + WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); + + if (rx_ring->vsi->type == ICE_VSI_PF && + !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->q_index)) + goto err; return 0; err: @@ -371,24 +408,110 @@ err: } /** - * ice_release_rx_desc - Store the new tail and head values - * @rx_ring: ring to bump - * @val: new head index + * ice_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. */ -static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) +static unsigned int ice_rx_offset(struct ice_ring *rx_ring) { - rx_ring->next_to_use = val; + if (ice_ring_uses_build_skb(rx_ring)) + return ICE_SKB_PAD; + else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) + return XDP_PACKET_HEADROOM; - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; + return 0; +} - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - writel(val, rx_ring->tail); +/** + * ice_run_xdp - Executes an XDP program on initialized xdp_buff + * @rx_ring: Rx ring + * @xdp: xdp_buff used as input to the XDP program + * @xdp_prog: XDP program to run + * + * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} + */ +static int +ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + int err, result = ICE_XDP_PASS; + struct ice_ring *xdp_ring; + u32 act; + + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()]; + result = ice_xmit_xdp_buff(xdp, xdp_ring); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fallthrough -- not supported action */ + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping frame */ + case XDP_DROP: + result = ICE_XDP_CONSUMED; + break; + } + + return result; +} + +/** + * ice_xdp_xmit - submit packets to XDP ring for transmission + * @dev: netdev + * @n: number of XDP frames to be transmitted + * @frames: XDP frames to be transmitted + * @flags: transmit flags + * + * Returns number of frames successfully sent. Frames that fail are + * free'ed via XDP return API. + * For error cases, a negative errno code is returned and no-frames + * are transmitted (caller must handle freeing frames). + */ +int +ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + unsigned int queue_index = smp_processor_id(); + struct ice_vsi *vsi = np->vsi; + struct ice_ring *xdp_ring; + int drops = 0, i; + + if (test_bit(__ICE_DOWN, vsi->state)) + return -ENETDOWN; + + if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq) + return -ENXIO; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + xdp_ring = vsi->xdp_rings[queue_index]; + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; + + err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); + if (err != ICE_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + if (unlikely(flags & XDP_XMIT_FLUSH)) + ice_xdp_ring_update_tail(xdp_ring); + + return n - drops; } /** @@ -412,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) } /* alloc new page for storage */ - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_page_failed++; return false; } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, 0); + __free_pages(page, ice_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_page_failed++; return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = ice_rx_offset(rx_ring); page_ref_add(page, USHRT_MAX - 1); bi->pagecnt_bias = USHRT_MAX; @@ -445,7 +568,13 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace * - * Returns false if all allocations were successful, true if any fail + * Returns false if all allocations were successful, true if any fail. Returning + * true signals to the caller that we didn't replace cleaned_count buffers and + * there is more work to do. + * + * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx + * buffers. Then bump tail at most one time. Grouping like this lets us avoid + * multiple tail writes per call. */ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) { @@ -462,13 +591,14 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) bi = &rx_ring->rx_buf[ntu]; do { + /* if we fail here, we have work remaining */ if (!ice_alloc_mapped_page(rx_ring, bi)) - goto no_bufs; + break; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - ICE_RXBUF_2048, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change @@ -494,16 +624,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) if (rx_ring->next_to_use != ntu) ice_release_rx_desc(rx_ring, ntu); - return false; - -no_bufs: - if (rx_ring->next_to_use != ntu) - ice_release_rx_desc(rx_ring, ntu); - - /* make sure to come back via polling to try again after - * allocation failure - */ - return true; + return !!cleaned_count; } /** @@ -523,7 +644,7 @@ static bool ice_page_is_reserved(struct page *page) * Update the offset within page so that Rx buf will be ready to be reused. * For systems with PAGE_SIZE < 8192 this function will flip the page offset * so the second half of page assigned to Rx buffer will be used, otherwise - * the offset is moved by the @size bytes + * the offset is moved by "size" bytes */ static void ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) @@ -548,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) */ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) { -#if (PAGE_SIZE >= 8192) - unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; -#endif unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; @@ -563,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; #else - if (rx_buf->page_offset > last_offset) +#define ICE_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; #endif /* PAGE_SIZE < 8192) */ @@ -581,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) /** * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag + * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: buffer containing page to add * @skb: sk_buff to place the data into * @size: packet length from rx_desc @@ -590,15 +711,17 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) * The function will then update the page offset. */ static void -ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, - unsigned int size) +ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + struct sk_buff *skb, unsigned int size) { #if (PAGE_SIZE >= 8192) - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring)); #else - unsigned int truesize = ICE_RXBUF_2048; + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; #endif + if (!size) + return; skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, rx_buf->page_offset, size, truesize); @@ -654,6 +777,8 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, prefetchw(rx_buf->page); *skb = rx_buf->skb; + if (!size) + return rx_buf; /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, rx_buf->page_offset, size, @@ -666,10 +791,64 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, } /** + * ice_build_skb - Build skb around an existing buffer + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buf: Rx buffer to pull data from + * @xdp: xdp_buff pointing to the data + * + * This function builds an skb around an existing Rx buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. + */ +static struct sk_buff * +ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); +#endif + struct sk_buff *skb; + + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points exactly as xdp->data, otherwise we + * likely have a consumer accessing first few bytes of meta + * data, and then actual data. + */ + prefetch(xdp->data_meta); +#if L1_CACHE_BYTES < 128 + prefetch((void *)(xdp->data + L1_CACHE_BYTES)); +#endif + /* build an skb around the page buffer */ + skb = build_skb(xdp->data_hard_start, truesize); + if (unlikely(!skb)) + return NULL; + + /* must to record Rx queue, otherwise OS features such as + * symmetric queue won't work + */ + skb_record_rx_queue(skb, rx_ring->q_index); + + /* update pointers within the skb to store the data */ + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); + + /* buffer is used by skb, update page_offset */ + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); + + return skb; +} + +/** * ice_construct_skb - Allocate skb and populate it * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from - * @size: the length of the packet + * @xdp: xdp_buff pointing to the data * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the @@ -677,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, */ static struct sk_buff * ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, - unsigned int size) + struct xdp_buff *xdp) { - void *va = page_address(rx_buf->page) + rx_buf->page_offset; + unsigned int size = xdp->data_end - xdp->data; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch((u8 *)va + L1_CACHE_BYTES); + prefetch((void *)(xdp->data + L1_CACHE_BYTES)); #endif /* L1_CACHE_BYTES */ /* allocate a skb to store the frags */ @@ -699,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, /* Determine available headroom for copy */ headlen = size; if (headlen > ICE_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, + sizeof(long))); /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; @@ -710,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, #if (PAGE_SIZE >= 8192) unsigned int truesize = SKB_DATA_ALIGN(size); #else - unsigned int truesize = ICE_RXBUF_2048; + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; #endif skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen, size, truesize); @@ -732,19 +912,30 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from * - * This function will clean up the contents of the rx_buf. It will - * either recycle the buffer or unmap it and free the associated resources. + * This function will update next_to_clean and then clean up the contents + * of the rx_buf. It will either recycle the buffer or unmap it and free + * the associated resources. */ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) { - /* hand second half of page back to the ring */ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + if (!rx_buf) + return; + if (ice_can_reuse_rx_page(rx_buf)) { + /* hand second half of page back to the ring */ ice_reuse_rx_page(rx_ring, rx_buf); rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE, - DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); + dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, + ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, + ICE_RX_DMA_ATTR); __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); } @@ -754,227 +945,31 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) } /** - * ice_cleanup_headers - Correct empty headers - * @skb: pointer to current skb being fixed - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - */ -static bool ice_cleanup_headers(struct sk_buff *skb) -{ - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - - return false; -} - -/** - * ice_test_staterr - tests bits in Rx descriptor status and error fields - * @rx_desc: pointer to receive descriptor (in le64 format) - * @stat_err_bits: value to mask - * - * This function does some fast chicanery in order to return the - * value of the mask which is really only used for boolean tests. - * The status_error_len doesn't need to be shifted because it begins - * at offset zero. - */ -static bool -ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits) -{ - return !!(rx_desc->wb.status_error0 & - cpu_to_le16(stat_err_bits)); -} - -/** * ice_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer * @skb: Current socket buffer containing buffer in progress * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. + * If the buffer is an EOP buffer, this function exits returning false, + * otherwise return true indicating that this is in fact a non-EOP buffer. */ static bool ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(ICE_RX_DESC(rx_ring, ntc)); - /* if we are the last buffer then there is nothing else to do */ #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) return false; /* place skb in next buffer to be received */ - rx_ring->rx_buf[ntc].skb = skb; + rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; } /** - * ice_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - */ -static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) -{ - return PKT_HASH_TYPE_NONE; -} - -/** - * ice_rx_hash - set the hash value in the skb - * @rx_ring: descriptor ring - * @rx_desc: specific descriptor - * @skb: pointer to current skb - * @rx_ptype: the ptype value from the descriptor - */ -static void -ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 rx_ptype) -{ - struct ice_32b_rx_flex_desc_nic *nic_mdid; - u32 hash; - - if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) - return; - - if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) - return; - - nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; - hash = le32_to_cpu(nic_mdid->rss_hash); - skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); -} - -/** - * ice_rx_csum - Indicate in skb if checksum is good - * @vsi: the VSI we care about - * @skb: skb currently being received and modified - * @rx_desc: the receive descriptor - * @ptype: the packet type decoded by hardware - * - * skb->protocol must be set before this function is called - */ -static void -ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb, - union ice_32b_rx_flex_desc *rx_desc, u8 ptype) -{ - struct ice_rx_ptype_decoded decoded; - u32 rx_error, rx_status; - bool ipv4, ipv6; - - rx_status = le16_to_cpu(rx_desc->wb.status_error0); - rx_error = rx_status; - - decoded = ice_decode_rx_desc_ptype(ptype); - - /* Start with CHECKSUM_NONE and by default csum_level = 0 */ - skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); - - /* check if Rx checksum is enabled */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) - return; - - /* check if HW has decoded the packet and checksum */ - if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) - return; - - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); - - if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | - BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) - goto checksum_fail; - else if (ipv6 && (rx_status & - (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) - goto checksum_fail; - - /* check for L4 errors and handle packets that were not able to be - * checksummed due to arrival speed - */ - if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) - goto checksum_fail; - - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case ICE_RX_PTYPE_INNER_PROT_TCP: - case ICE_RX_PTYPE_INNER_PROT_UDP: - case ICE_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - default: - break; - } - return; - -checksum_fail: - vsi->back->hw_csum_rx_error++; -} - -/** - * ice_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: Rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * @ptype: the packet type decoded by hardware - * - * This function checks the ring, descriptor, and packet information in - * order to populate the hash, checksum, VLAN, protocol, and - * other fields within the skb. - */ -static void -ice_process_skb_fields(struct ice_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 ptype) -{ - ice_rx_hash(rx_ring, rx_desc, skb, ptype); - - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); - - ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype); -} - -/** - * ice_receive_skb - Send a completed packet up the stack - * @rx_ring: Rx ring in play - * @skb: packet to send up - * @vlan_tag: VLAN tag for packet - * - * This function sends the completed packet (via. skb) up the stack using - * gro receive functions (with/without VLAN tag) - */ -static void -ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) -{ - if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && - (vlan_tag & VLAN_VID_MASK)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - napi_gro_receive(&rx_ring->q_vector->napi, skb); -} - -/** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process @@ -990,7 +985,12 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); - bool failure = false; + unsigned int xdp_res, xdp_xmit = 0; + struct bpf_prog *xdp_prog = NULL; + struct xdp_buff xdp; + bool failure; + + xdp.rxq = &rx_ring->xdp_rxq; /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { @@ -1002,13 +1002,6 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) u16 vlan_tag = 0; u8 rx_ptype; - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= ICE_RX_BUF_WRITE) { - failure = failure || - ice_alloc_rx_bufs(rx_ring, cleaned_count); - cleaned_count = 0; - } - /* get the Rx desc from Rx ring based on 'next_to_clean' */ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); @@ -1030,17 +1023,67 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) size = le16_to_cpu(rx_desc->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; + /* retrieve a buffer from the ring */ rx_buf = ice_get_rx_buf(rx_ring, &skb, size); - /* allocate (if needed) and populate skb */ - if (skb) - ice_add_rx_frag(rx_buf, skb, size); - else - skb = ice_construct_skb(rx_ring, rx_buf, size); + if (!size) { + xdp.data = NULL; + xdp.data_end = NULL; + xdp.data_hard_start = NULL; + xdp.data_meta = NULL; + goto construct_skb; + } + + xdp.data = page_address(rx_buf->page) + rx_buf->page_offset; + xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring); + xdp.data_meta = xdp.data; + xdp.data_end = xdp.data + size; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + goto construct_skb; + } + + xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog); + rcu_read_unlock(); + if (!xdp_res) + goto construct_skb; + if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { + unsigned int truesize; + +#if (PAGE_SIZE < 8192) + truesize = ice_rx_pg_size(rx_ring) / 2; +#else + truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + + size); +#endif + xdp_xmit |= xdp_res; + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); + } else { + rx_buf->pagecnt_bias++; + } + total_rx_bytes += size; + total_rx_pkts++; + + cleaned_count++; + ice_put_rx_buf(rx_ring, rx_buf); + continue; +construct_skb: + if (skb) { + ice_add_rx_frag(rx_ring, rx_buf, skb, size); + } else if (likely(xdp.data)) { + if (ice_ring_uses_build_skb(rx_ring)) + skb = ice_build_skb(rx_ring, rx_buf, &xdp); + else + skb = ice_construct_skb(rx_ring, rx_buf, &xdp); + } /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_buf_failed++; - rx_buf->pagecnt_bias++; + if (rx_buf) + rx_buf->pagecnt_bias++; break; } @@ -1057,17 +1100,12 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) continue; } - rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & - ICE_RX_FLEX_DESC_PTYPE_M; - stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); if (ice_test_staterr(rx_desc, stat_err_bits)) vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); - /* correct empty headers and pad skb if needed (to make valid - * ethernet frame - */ - if (ice_cleanup_headers(skb)) { + /* pad the skb if needed, to make a valid ethernet frame */ + if (eth_skb_pad(skb)) { skb = NULL; continue; } @@ -1076,6 +1114,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) total_rx_bytes += skb->len; /* populate checksum, VLAN, and protocol */ + rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & + ICE_RX_FLEX_DESC_PTYPE_M; + ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); /* send completed skb up the stack */ @@ -1085,13 +1126,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) total_rx_pkts++; } - /* update queue and vector specific stats */ - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->stats.pkts += total_rx_pkts; - rx_ring->stats.bytes += total_rx_bytes; - u64_stats_update_end(&rx_ring->syncp); - rx_ring->q_vector->rx.total_pkts += total_rx_pkts; - rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + /* return up to cleaned_count buffers to hardware */ + failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); + + if (xdp_prog) + ice_finalize_xdp_rx(rx_ring, xdp_xmit); + + ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); /* guarantee a trip back through this routine if there was a failure */ return failure ? budget : (int)total_rx_pkts; @@ -1212,6 +1253,8 @@ ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) if (time_after(next_update, rc->next_update)) goto clear_counts; + prefetch(q_vector->vsi->port_info); + packets = rc->total_pkts; bytes = rc->total_bytes; @@ -1341,16 +1384,32 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) /** * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt - * @vsi: the VSI associated with the q_vector * @q_vector: q_vector for which ITR is being updated and interrupt enabled */ -static void -ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +static void ice_update_ena_itr(struct ice_q_vector *q_vector) { struct ice_ring_container *tx = &q_vector->tx; struct ice_ring_container *rx = &q_vector->rx; + struct ice_vsi *vsi = q_vector->vsi; u32 itr_val; + /* when exiting WB_ON_ITR lets set a low ITR value and trigger + * interrupts to expire right away in case we have more work ready to go + * already + */ + if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) { + itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS); + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); + /* set target back to last user set value */ + rx->target_itr = rx->itr_setting; + /* set current to what we just wrote and dynamic if needed */ + rx->current_itr = ICE_WB_ON_ITR_USECS | + (rx->itr_setting & ICE_ITR_DYNAMIC); + /* allow normal interrupt flow to start */ + q_vector->itr_countdown = 0; + return; + } + /* This will do nothing if dynamic updates are not enabled */ ice_update_itr(q_vector, tx); ice_update_itr(q_vector, rx); @@ -1389,13 +1448,48 @@ ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) q_vector->itr_countdown--; } - if (!test_bit(__ICE_DOWN, vsi->state)) - wr32(&vsi->back->hw, + if (!test_bit(__ICE_DOWN, q_vector->vsi->state)) + wr32(&q_vector->vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } /** + * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector + * @q_vector: q_vector to set WB_ON_ITR on + * + * We need to tell hardware to write-back completed descriptors even when + * interrupts are disabled. Descriptors will be written back on cache line + * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR + * descriptors may not be written back if they don't fill a cache line until the + * next interrupt. + * + * This sets the write-back frequency to 2 microseconds as that is the minimum + * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to + * make sure hardware knows we aren't meddling with the INTENA_M bit. + */ +static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) +{ + struct ice_vsi *vsi = q_vector->vsi; + + /* already in WB_ON_ITR mode no need to change it */ + if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) + return; + + if (q_vector->num_ring_rx) + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), + ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, + ICE_RX_ITR)); + + if (q_vector->num_ring_tx) + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), + ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, + ICE_TX_ITR)); + + q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE; +} + +/** * ice_napi_poll - NAPI polling Rx/Tx cleanup routine * @napi: napi struct with our devices info in it * @budget: amount of work driver is allowed to do this pass, in packets @@ -1408,34 +1502,48 @@ int ice_napi_poll(struct napi_struct *napi, int budget) { struct ice_q_vector *q_vector = container_of(napi, struct ice_q_vector, napi); - struct ice_vsi *vsi = q_vector->vsi; - struct ice_pf *pf = vsi->back; bool clean_complete = true; - int budget_per_ring = 0; struct ice_ring *ring; + int budget_per_ring; int work_done = 0; /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ - ice_for_each_ring(ring, q_vector->tx) - if (!ice_clean_tx_irq(vsi, ring, budget)) + ice_for_each_ring(ring, q_vector->tx) { + bool wd = ring->xsk_umem ? + ice_clean_tx_irq_zc(ring, budget) : + ice_clean_tx_irq(ring, budget); + + if (!wd) clean_complete = false; + } /* Handle case where we are called by netpoll with a budget of 0 */ - if (budget <= 0) + if (unlikely(budget <= 0)) return budget; - /* We attempt to distribute budget to each Rx queue fairly, but don't - * allow the budget to go below 1 because that would exit polling early. - */ - if (q_vector->num_ring_rx) + /* normally we have 1 Rx ring per q_vector */ + if (unlikely(q_vector->num_ring_rx > 1)) + /* We attempt to distribute budget to each Rx queue fairly, but + * don't allow the budget to go below 1 because that would exit + * polling early. + */ budget_per_ring = max(budget / q_vector->num_ring_rx, 1); + else + /* Max of 1 Rx ring in this q_vector so give it the budget */ + budget_per_ring = budget; ice_for_each_ring(ring, q_vector->rx) { int cleaned; - cleaned = ice_clean_rx_irq(ring, budget_per_ring); + /* A dedicated path for zero-copy allows making a single + * comparison in the irq context instead of many inside the + * ice_clean_rx_irq function and makes the codebase cleaner. + */ + cleaned = ring->xsk_umem ? + ice_clean_rx_irq_zc(ring, budget_per_ring) : + ice_clean_rx_irq(ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ if (cleaned >= budget_per_ring) @@ -1450,23 +1558,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * poll us due to busy-polling */ if (likely(napi_complete_done(napi, work_done))) - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - ice_update_ena_itr(vsi, q_vector); + ice_update_ena_itr(q_vector); + else + ice_set_wb_on_itr(q_vector); return min_t(int, work_done, budget - 1); } -/* helper function for building cmd/type/offset */ -static __le64 -build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) -{ - return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | - (td_cmd << ICE_TXD_QW1_CMD_S) | - (td_offset << ICE_TXD_QW1_OFFSET_S) | - ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | - (td_tag << ICE_TXD_QW1_L2TAG1_S)); -} - /** * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions * @tx_ring: the ring to be checked @@ -1521,11 +1619,11 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, { u64 td_offset, td_tag, td_cmd; u16 i = tx_ring->next_to_use; - struct skb_frag_struct *frag; unsigned int data_len, size; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; struct sk_buff *skb; + skb_frag_t *frag; dma_addr_t dma; td_tag = off->td_l2tag1; @@ -1618,9 +1716,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, i = 0; /* write last descriptor with RS and EOP bits */ - td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS); - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag); + td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD; + tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size, + td_tag); /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. @@ -1638,9 +1736,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, ice_maybe_stop_tx(tx_ring, DESC_NEEDED); /* notify HW of packet */ - if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { + if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) writel(i, tx_ring->tail); - } return; @@ -1828,6 +1925,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) } ip; union { struct tcphdr *tcp; + struct udphdr *udp; unsigned char *hdr; } l4; u64 cd_mss, cd_tso_len; @@ -1861,10 +1959,18 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) /* remove payload length from checksum */ paylen = skb->len - l4_start; - csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); - /* compute length of segmentation header */ - off->header_len = (l4.tcp->doff * 4) + l4_start; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + /* compute length of UDP segmentation header */ + off->header_len = sizeof(l4.udp) + l4_start; + } else { + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + /* compute length of TCP segmentation header */ + off->header_len = (l4.tcp->doff * 4) + l4_start; + } /* update gso_segs and bytecount */ first->gso_segs = skb_shinfo(skb)->gso_segs; @@ -1923,7 +2029,7 @@ static unsigned int ice_txd_use_count(unsigned int size) */ static unsigned int ice_xmit_desc_count(struct sk_buff *skb) { - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; unsigned int nr_frags = skb_shinfo(skb)->nr_frags; unsigned int count = 0, size = skb_headlen(skb); @@ -1954,7 +2060,7 @@ static unsigned int ice_xmit_desc_count(struct sk_buff *skb) */ static bool __ice_chk_linearize(struct sk_buff *skb) { - const struct skb_frag_struct *frag, *stale; + const skb_frag_t *frag, *stale; int nr_frags, sum; /* no need to check if number of frags is less than 7 */ @@ -1969,7 +2075,7 @@ static bool __ice_chk_linearize(struct sk_buff *skb) frag = &skb_shinfo(skb)->frags[0]; /* Initialize size to the negative value of gso_size minus 1. We - * use this as the worst case scenerio in which the frag ahead + * use this as the worst case scenario in which the frag ahead * of us only provides one byte which is why we are limited to 6 * descriptors for a single transmit as the header and previous * fragment are already consuming 2 descriptors. @@ -2036,6 +2142,7 @@ static netdev_tx_t ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) { struct ice_tx_offload_params offload = { 0 }; + struct ice_vsi *vsi = tx_ring->vsi; struct ice_tx_buf *first; unsigned int count; int tso, csum; @@ -2083,7 +2190,15 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) if (csum < 0) goto out_drop; - if (tso || offload.cd_tunnel_params) { + /* allow CONTROL frames egress from main VSI if FW LLDP disabled */ + if (unlikely(skb->priority == TC_PRIO_CONTROL && + vsi->type == ICE_VSI_PF && + vsi->port_info->is_sw_lldp)) + offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | + ICE_TX_CTX_DESC_SWTCH_UPLINK << + ICE_TXD_CTX_QW1_CMD_S); + + if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { struct ice_tx_ctx_desc *cdesc; int i = tx_ring->next_to_use; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index ec76aba347b9..14a1bf445889 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -4,8 +4,12 @@ #ifndef _ICE_TXRX_H_ #define _ICE_TXRX_H_ +#include "ice_type.h" + #define ICE_DFLT_IRQ_WORK 256 +#define ICE_RXBUF_3072 3072 #define ICE_RXBUF_2048 2048 +#define ICE_RXBUF_1536 1536 #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 @@ -22,6 +26,71 @@ #define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ #define ICE_MAX_TXQ_PER_TXQG 128 +/* Attempt to maximize the headroom available for incoming frames. We use a 2K + * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame. + * This leaves us with 512 bytes of room. From that we need to deduct the + * space needed for the shared info and the padding needed to IP align the + * frame. + * + * Note: For cache line sizes 256 or larger this value is going to end + * up negative. In these cases we should fall back to the legacy + * receive path. + */ +#if (PAGE_SIZE < 8192) +#define ICE_2K_TOO_SMALL_WITH_PADDING \ +((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048)) + +/** + * ice_compute_pad - compute the padding + * rx_buf_len: buffer length + * + * Figure out the size of half page based on given buffer length and + * then subtract the skb_shared_info followed by subtraction of the + * actual buffer length; this in turn results in the actual space that + * is left for padding usage + */ +static inline int ice_compute_pad(int rx_buf_len) +{ + int half_page_size; + + half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); + return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len; +} + +/** + * ice_skb_pad - determine the padding that we can supply + * + * Figure out the right Rx buffer size and based on that calculate the + * padding + */ +static inline int ice_skb_pad(void) +{ + int rx_buf_len; + + /* If a 2K buffer cannot handle a standard Ethernet frame then + * optimize padding for a 3K buffer instead of a 1.5K buffer. + * + * For a 3K buffer we need to add enough padding to allow for + * tailroom due to NET_IP_ALIGN possibly shifting us out of + * cache-line alignment. + */ + if (ICE_2K_TOO_SMALL_WITH_PADDING) + rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); + else + rx_buf_len = ICE_RXBUF_1536; + + /* if needed make room for NET_IP_ALIGN */ + rx_buf_len -= NET_IP_ALIGN; + + return ice_compute_pad(rx_buf_len); +} + +#define ICE_SKB_PAD ice_skb_pad() +#else +#define ICE_2K_TOO_SMALL_WITH_PADDING false +#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#endif + /* We are assuming that the cache line is always 64 Bytes here for ice. * In order to make sure that is a correct assumption there is a check in probe * to print a warning if the read from GLPCI_CNF2 tells us that the cache line @@ -49,12 +118,24 @@ #define ICE_TX_FLAGS_VLAN_PR_S 29 #define ICE_TX_FLAGS_VLAN_S 16 +#define ICE_XDP_PASS 0 +#define ICE_XDP_CONSUMED BIT(0) +#define ICE_XDP_TX BIT(1) +#define ICE_XDP_REDIR BIT(2) + #define ICE_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +#define ICE_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) + +#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS) + struct ice_tx_buf { struct ice_tx_desc *next_to_watch; - struct sk_buff *skb; + union { + struct sk_buff *skb; + void *raw_buf; /* used for XDP */ + }; unsigned int bytecount; unsigned short gso_segs; u32 tx_flags; @@ -76,9 +157,17 @@ struct ice_tx_offload_params { struct ice_rx_buf { struct sk_buff *skb; dma_addr_t dma; - struct page *page; - unsigned int page_offset; - u16 pagecnt_bias; + union { + struct { + struct page *page; + unsigned int page_offset; + u16 pagecnt_bias; + }; + struct { + void *addr; + u64 handle; + }; + }; }; struct ice_q_stats { @@ -144,6 +233,19 @@ enum ice_rx_dtype { #define ICE_DFLT_INTRL 0 #define ICE_MAX_INTRL 236 +#define ICE_WB_ON_ITR_USECS 2 +#define ICE_IN_WB_ON_ITR_MODE 255 +/* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows + * setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also, + * set the write-back latency to the usecs passed in. + */ +#define ICE_GLINT_DYN_CTL_WB_ON_ITR(usecs, itr_idx) \ + ((((usecs) << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)) & \ + GLINT_DYN_CTL_INTERVAL_M) | \ + (((itr_idx) << GLINT_DYN_CTL_ITR_INDX_S) & \ + GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | \ + GLINT_DYN_CTL_WB_ON_ITR_M) + /* Legacy or Advanced Mode Queue */ #define ICE_TX_ADVANCED 0 #define ICE_TX_LEGACY 1 @@ -185,18 +287,44 @@ struct ice_ring { }; struct rcu_head rcu; /* to avoid race on free */ + struct bpf_prog *xdp_prog; + struct xdp_umem *xsk_umem; + struct zero_copy_allocator zca; + /* CL3 - 3rd cacheline starts here */ + struct xdp_rxq_info xdp_rxq; /* CLX - the below items are only accessed infrequently and should be * in their own cache line if possible */ +#define ICE_TX_FLAGS_RING_XDP BIT(0) +#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) + u8 flags; dma_addr_t dma; /* physical address of ring */ unsigned int size; /* length of descriptor ring in bytes */ u32 txq_teid; /* Added Tx queue TEID */ u16 rx_buf_len; -#ifdef CONFIG_DCB u8 dcb_tc; /* Traffic class of ring */ -#endif /* CONFIG_DCB */ } ____cacheline_internodealigned_in_smp; +static inline bool ice_ring_uses_build_skb(struct ice_ring *ring) +{ + return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB); +} + +static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring) +{ + ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB; +} + +static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring) +{ + ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; +} + +static inline bool ice_ring_is_xdp(struct ice_ring *ring) +{ + return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); +} + struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; @@ -213,10 +341,29 @@ struct ice_ring_container { u16 itr_setting; }; +struct ice_coalesce_stored { + u16 itr_tx; + u16 itr_rx; + u8 intrl; +}; + /* iterator for handling rings in ring container */ #define ice_for_each_ring(pos, head) \ for (pos = (head).ring; pos; pos = pos->next) +static inline unsigned int ice_rx_pg_order(struct ice_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring->rx_buf_len > (PAGE_SIZE / 2)) + return 1; +#endif + return 0; +} + +#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring)) + +union ice_32b_rx_flex_desc; + bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count); netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); void ice_clean_tx_ring(struct ice_ring *tx_ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c new file mode 100644 index 000000000000..6da048a6ca7c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice_txrx_lib.h" + +/** + * ice_release_rx_desc - Store the new tail and head values + * @rx_ring: ring to bump + * @val: new head index + */ +void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) +{ + u16 prev_ntu = rx_ring->next_to_use & ~0x7; + + rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; + + /* QRX_TAIL will be updated with any tail value, but hardware ignores + * the lower 3 bits. This makes it so we only bump tail on meaningful + * boundaries. Also, this allows us to bump tail on intervals of 8 up to + * the budget depending on the current traffic load. + */ + val &= ~0x7; + if (prev_ntu != val) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(val, rx_ring->tail); + } +} + +/** + * ice_ptype_to_htype - get a hash type + * @ptype: the ptype value from the descriptor + * + * Returns a hash type to be used by skb_set_hash + */ +static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) +{ + return PKT_HASH_TYPE_NONE; +} + +/** + * ice_rx_hash - set the hash value in the skb + * @rx_ring: descriptor ring + * @rx_desc: specific descriptor + * @skb: pointer to current skb + * @rx_ptype: the ptype value from the descriptor + */ +static void +ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 rx_ptype) +{ + struct ice_32b_rx_flex_desc_nic *nic_mdid; + u32 hash; + + if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) + return; + + if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) + return; + + nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; + hash = le32_to_cpu(nic_mdid->rss_hash); + skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); +} + +/** + * ice_rx_csum - Indicate in skb if checksum is good + * @ring: the ring we care about + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: the packet type decoded by hardware + * + * skb->protocol must be set before this function is called + */ +static void +ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, + union ice_32b_rx_flex_desc *rx_desc, u8 ptype) +{ + struct ice_rx_ptype_decoded decoded; + u32 rx_error, rx_status; + bool ipv4, ipv6; + + rx_status = le16_to_cpu(rx_desc->wb.status_error0); + rx_error = rx_status; + + decoded = ice_decode_rx_desc_ptype(ptype); + + /* Start with CHECKSUM_NONE and by default csum_level = 0 */ + skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + + /* check if Rx checksum is enabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* check if HW has decoded the packet and checksum */ + if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) + return; + + if (!(decoded.known && decoded.outer_ip)) + return; + + ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + + if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) + goto checksum_fail; + else if (ipv6 && (rx_status & + (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) + goto checksum_fail; + + /* check for L4 errors and handle packets that were not able to be + * checksummed due to arrival speed + */ + if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) + goto checksum_fail; + + /* Only report checksum unnecessary for TCP, UDP, or SCTP */ + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + case ICE_RX_PTYPE_INNER_PROT_UDP: + case ICE_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + default: + break; + } + return; + +checksum_fail: + ring->vsi->back->hw_csum_rx_error++; +} + +/** + * ice_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: Rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * @ptype: the packet type decoded by hardware + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +void +ice_process_skb_fields(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 ptype) +{ + ice_rx_hash(rx_ring, rx_desc, skb, ptype); + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + + ice_rx_csum(rx_ring, skb, rx_desc, ptype); +} + +/** + * ice_receive_skb - Send a completed packet up the stack + * @rx_ring: Rx ring in play + * @skb: packet to send up + * @vlan_tag: VLAN tag for packet + * + * This function sends the completed packet (via. skb) up the stack using + * gro receive functions (with/without VLAN tag) + */ +void +ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) +{ + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + napi_gro_receive(&rx_ring->q_vector->napi, skb); +} + +/** + * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission + * @data: packet data pointer + * @size: packet data size + * @xdp_ring: XDP ring for transmission + */ +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) +{ + u16 i = xdp_ring->next_to_use; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + dma_addr_t dma; + + if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { + xdp_ring->tx_stats.tx_busy++; + return ICE_XDP_CONSUMED; + } + + dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(xdp_ring->dev, dma)) + return ICE_XDP_CONSUMED; + + tx_buf = &xdp_ring->tx_buf[i]; + tx_buf->bytecount = size; + tx_buf->gso_segs = 1; + tx_buf->raw_buf = data; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + tx_desc = ICE_TX_DESC(xdp_ring, i); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0, + size, 0); + + /* Make certain all of the status bits have been updated + * before next_to_watch is written. + */ + smp_wmb(); + + i++; + if (i == xdp_ring->count) + i = 0; + + tx_buf->next_to_watch = tx_desc; + xdp_ring->next_to_use = i; + + return ICE_XDP_TX; +} + +/** + * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it + * @xdp: XDP buffer + * @xdp_ring: XDP Tx ring + * + * Returns negative on failure, 0 on success. + */ +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring) +{ + struct xdp_frame *xdpf = convert_to_xdp_frame(xdp); + + if (unlikely(!xdpf)) + return ICE_XDP_CONSUMED; + + return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); +} + +/** + * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map + * @rx_ring: Rx ring + * @xdp_res: Result of the receive batch + * + * This function bumps XDP Tx tail and/or flush redirect map, and + * should be called when a batch of packets has been processed in the + * napi loop. + */ +void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res) +{ + if (xdp_res & ICE_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_res & ICE_XDP_TX) { + struct ice_ring *xdp_ring = + rx_ring->vsi->xdp_rings[rx_ring->q_index]; + + ice_xdp_ring_update_tail(xdp_ring); + } +} diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h new file mode 100644 index 000000000000..ba9164dad9ae --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_TXRX_LIB_H_ +#define _ICE_TXRX_LIB_H_ +#include "ice.h" + +/** + * ice_test_staterr - tests bits in Rx descriptor status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_len doesn't need to be shifted because it begins + * at offset zero. + */ +static inline bool +ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits) +{ + return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits)); +} + +static inline __le64 +build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) +{ + return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | + (td_cmd << ICE_TXD_QW1_CMD_S) | + (td_offset << ICE_TXD_QW1_OFFSET_S) | + ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | + (td_tag << ICE_TXD_QW1_L2TAG1_S)); +} + +/** + * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register + * @xdp_ring: XDP Tx ring + * + * This function updates the XDP Tx ring tail register. + */ +static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) +{ + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); +} + +void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res); +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring); +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring); +void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val); +void +ice_process_skb_fields(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 ptype); +void +ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); +#endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 24bbef8bbe69..db0ef6ba907f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -4,18 +4,31 @@ #ifndef _ICE_TYPE_H_ #define _ICE_TYPE_H_ +#define ICE_BYTES_PER_WORD 2 +#define ICE_BYTES_PER_DWORD 4 + #include "ice_status.h" #include "ice_hw_autogen.h" #include "ice_osdep.h" #include "ice_controlq.h" #include "ice_lan_tx_rx.h" +#include "ice_flex_type.h" +#include "ice_protocol_type.h" -#define ICE_BYTES_PER_WORD 2 -#define ICE_BYTES_PER_DWORD 4 +static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc) +{ + return test_bit(tc, &bitmap); +} -static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) +static inline u64 round_up_64bit(u64 a, u32 b) { - return test_bit(tc, (unsigned long *)&bitmap); + return div64_long(((a) + (b) / 2), (b)); +} + +static inline u32 ice_round_to_num(u32 N, u32 R) +{ + return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) : + ((((N) + (R) - 1) / (R)) * (R))); } /* Driver always calls main vsi_handle first */ @@ -29,10 +42,14 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) #define ICE_DBG_QCTX BIT_ULL(6) #define ICE_DBG_NVM BIT_ULL(7) #define ICE_DBG_LAN BIT_ULL(8) +#define ICE_DBG_FLOW BIT_ULL(9) #define ICE_DBG_SW BIT_ULL(13) #define ICE_DBG_SCHED BIT_ULL(14) +#define ICE_DBG_PKG BIT_ULL(16) #define ICE_DBG_RES BIT_ULL(17) #define ICE_DBG_AQ_MSG BIT_ULL(24) +#define ICE_DBG_AQ_DESC BIT_ULL(25) +#define ICE_DBG_AQ_DESC_BUF BIT_ULL(26) #define ICE_DBG_AQ_CMD BIT_ULL(27) #define ICE_DBG_USER BIT_ULL(31) @@ -53,6 +70,14 @@ enum ice_aq_res_access_type { ICE_RES_WRITE }; +struct ice_driver_ver { + u8 major_ver; + u8 minor_ver; + u8 build_ver; + u8 subbuild_ver; + u8 driver_string[32]; +}; + enum ice_fc_mode { ICE_FC_NONE = 0, ICE_FC_RX_PAUSE, @@ -139,6 +164,9 @@ struct ice_phy_info { /* Common HW capabilities for SW use */ struct ice_hw_common_caps { u32 valid_functions; + /* DCB capabilities */ + u32 active_tc_bitmap; + u32 maxtc; /* Tx/Rx queues */ u16 num_rxq; /* Number/Total Rx queues */ @@ -176,6 +204,7 @@ struct ice_hw_dev_caps { struct ice_hw_common_caps common_cap; u32 num_vfs_exposed; /* Total number of VFs exposed */ u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ + u32 num_funcs; }; /* MAC info */ @@ -219,6 +248,8 @@ struct ice_nvm_info { u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; +#define ICE_NVM_VER_LEN 32 + /* Max number of port to queue branches w.r.t topology */ #define ICE_MAX_TRAFFIC_CLASS 8 #define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS @@ -257,10 +288,56 @@ enum ice_agg_type { ICE_AGG_TYPE_QG }; +/* Rate limit types */ +enum ice_rl_type { + ICE_UNKNOWN_BW = 0, + ICE_MIN_BW, /* for CIR profile */ + ICE_MAX_BW, /* for EIR profile */ + ICE_SHARED_BW /* for shared profile */ +}; + +#define ICE_SCHED_MIN_BW 500 /* in Kbps */ +#define ICE_SCHED_MAX_BW 100000000 /* in Kbps */ +#define ICE_SCHED_DFLT_BW 0xFFFFFFFF /* unlimited */ #define ICE_SCHED_DFLT_RL_PROF_ID 0 +#define ICE_SCHED_NO_SHARED_RL_PROF_ID 0xFFFF #define ICE_SCHED_DFLT_BW_WT 1 +#define ICE_SCHED_INVAL_PROF_ID 0xFFFF +#define ICE_SCHED_DFLT_BURST_SIZE (15 * 1024) /* in bytes (15k) */ -/* VSI type list entry to locate corresponding VSI/ag nodes */ + /* Data structure for saving BW information */ +enum ice_bw_type { + ICE_BW_TYPE_PRIO, + ICE_BW_TYPE_CIR, + ICE_BW_TYPE_CIR_WT, + ICE_BW_TYPE_EIR, + ICE_BW_TYPE_EIR_WT, + ICE_BW_TYPE_SHARED, + ICE_BW_TYPE_CNT /* This must be last */ +}; + +struct ice_bw { + u32 bw; + u16 bw_alloc; +}; + +struct ice_bw_type_info { + DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT); + u8 generic; + struct ice_bw cir_bw; + struct ice_bw eir_bw; + u32 shared_bw; +}; + +/* VSI queue context structure for given TC */ +struct ice_q_ctx { + u16 q_handle; + u32 q_teid; + /* bw_t_info saves queue BW information */ + struct ice_bw_type_info bw_t_info; +}; + +/* VSI type list entry to locate corresponding VSI/aggregator nodes */ struct ice_sched_vsi_info { struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS]; struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS]; @@ -347,6 +424,10 @@ struct ice_port_info { struct ice_mac_info mac; struct ice_phy_info phy; struct mutex sched_lock; /* protect access to TXSched tree */ + struct ice_sched_node * + sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM]; + /* List contain profile ID(s) and other params per layer */ + struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM]; struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */ /* DCBX info */ struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */ @@ -398,6 +479,8 @@ struct ice_hw { u8 pf_id; /* device profile info */ + u16 max_burst_size; /* driver sets this value */ + /* Tx Scheduler values */ u16 num_tx_sched_layers; u16 num_tx_sched_phys_layers; @@ -434,7 +517,7 @@ struct ice_hw { struct ice_fw_log_cfg fw_log; /* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL - * register. Used for determining the ITR/intrl granularity during + * register. Used for determining the ITR/INTRL granularity during * initialization. */ #define ICE_MAX_AGG_BW_200G 0x0 @@ -454,6 +537,34 @@ struct ice_hw { u8 ucast_shared; /* true if VSIs can share unicast addr */ + /* Active package version (currently active) */ + struct ice_pkg_ver active_pkg_ver; + u8 active_pkg_name[ICE_PKG_NAME_SIZE]; + u8 active_pkg_in_nvm; + + enum ice_aq_err pkg_dwnld_status; + + /* Driver's package ver - (from the Metadata seg) */ + struct ice_pkg_ver pkg_ver; + u8 pkg_name[ICE_PKG_NAME_SIZE]; + + /* Driver's Ice package version (from the Ice seg) */ + struct ice_pkg_ver ice_pkg_ver; + u8 ice_pkg_name[ICE_PKG_NAME_SIZE]; + + /* Pointer to the ice segment */ + struct ice_seg *seg; + + /* Pointer to allocated copy of pkg memory */ + u8 *pkg_copy; + u32 pkg_size; + + /* HW block tables */ + struct ice_blk_info blk[ICE_BLK_COUNT]; + struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */ + struct list_head fl_profs[ICE_BLK_COUNT]; + struct mutex rss_locks; /* protect RSS configuration */ + struct list_head rss_list_head; }; /* Statistics collected by each port, VSI, VEB, and S-channel */ @@ -514,6 +625,8 @@ struct ice_hw_port_stats { }; /* Checksum and Shadow RAM pointers */ +#define ICE_SR_BOOT_CFG_PTR 0x132 +#define ICE_NVM_OEM_VER_OFF 0x02 #define ICE_SR_NVM_DEV_STARTER_VER 0x18 #define ICE_SR_NVM_EETRACK_LO 0x2D #define ICE_SR_NVM_EETRACK_HI 0x2E @@ -527,6 +640,7 @@ struct ice_hw_port_stats { #define ICE_OEM_VER_BUILD_MASK (0xffff << ICE_OEM_VER_BUILD_SHIFT) #define ICE_OEM_VER_SHIFT 24 #define ICE_OEM_VER_MASK (0xff << ICE_OEM_VER_SHIFT) +#define ICE_SR_PFA_PTR 0x40 #define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 #define ICE_SR_WORDS_IN_1KB 512 diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 5d24b539648f..262714d5f54a 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2,37 +2,36 @@ /* Copyright (c) 2018, Intel Corporation. */ #include "ice.h" +#include "ice_base.h" #include "ice_lib.h" /** - * ice_err_to_virt err - translate errors for VF return code - * @ice_err: error return code - */ -static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err) -{ - switch (ice_err) { - case ICE_SUCCESS: - return VIRTCHNL_STATUS_SUCCESS; - case ICE_ERR_BAD_PTR: - case ICE_ERR_INVAL_SIZE: - case ICE_ERR_DEVICE_NOT_SUPPORTED: - case ICE_ERR_PARAM: - case ICE_ERR_CFG: - return VIRTCHNL_STATUS_ERR_PARAM; - case ICE_ERR_NO_MEMORY: - return VIRTCHNL_STATUS_ERR_NO_MEMORY; - case ICE_ERR_NOT_READY: - case ICE_ERR_RESET_FAILED: - case ICE_ERR_FW_API_VER: - case ICE_ERR_AQ_ERROR: - case ICE_ERR_AQ_TIMEOUT: - case ICE_ERR_AQ_FULL: - case ICE_ERR_AQ_NO_WORK: - case ICE_ERR_AQ_EMPTY: - return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; - default: - return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + * ice_validate_vf_id - helper to check if VF ID is valid + * @pf: pointer to the PF structure + * @vf_id: the ID of the VF to check + */ +static int ice_validate_vf_id(struct ice_pf *pf, int vf_id) +{ + if (vf_id >= pf->num_alloc_vfs) { + dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %d\n", vf_id); + return -EINVAL; + } + return 0; +} + +/** + * ice_check_vf_init - helper to check if VF init complete + * @pf: pointer to the PF structure + * @vf: the pointer to the VF to check + */ +static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) +{ + if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + dev_err(ice_pf_to_dev(pf), "VF ID: %d in reset. Try again.\n", + vf->vf_id); + return -EBUSY; } + return 0; } /** @@ -48,10 +47,11 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { struct ice_hw *hw = &pf->hw; - struct ice_vf *vf = pf->vf; int i; - for (i = 0; i < pf->num_alloc_vfs; i++, vf++) { + ice_for_each_vf(pf, i) { + struct ice_vf *vf = &pf->vf[i]; + /* Not all vfs are enabled so skip the ones that are not */ if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) @@ -91,26 +91,6 @@ ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe, } /** - * ice_set_pfe_link_forced - Force the virtchnl_pf_event link speed/status - * @vf: pointer to the VF structure - * @pfe: pointer to the virtchnl_pf_event to set link speed/status for - * @link_up: whether or not to set the link up/down - */ -static void -ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe, - bool link_up) -{ - u16 link_speed; - - if (link_up) - link_speed = ICE_AQ_LINK_SPEED_100GB; - else - link_speed = ICE_AQ_LINK_SPEED_UNKNOWN; - - ice_set_pfe_link(vf, pfe, link_speed, link_up); -} - -/** * ice_vc_notify_vf_link_state - Inform a VF of link status * @vf: pointer to the VF structure * @@ -129,11 +109,18 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf) pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = PF_EVENT_SEVERITY_INFO; - if (vf->link_forced) - ice_set_pfe_link_forced(vf, &pfe, vf->link_up); - else - ice_set_pfe_link(vf, &pfe, ls->link_speed, ls->link_info & - ICE_AQ_LINK_UP); + /* Always report link is down if the VF queues aren't enabled */ + if (!vf->num_qs_ena) { + ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false); + } else if (vf->link_forced) { + u16 link_speed = vf->link_up ? + ls->link_speed : ICE_AQ_LINK_SPEED_UNKNOWN; + + ice_set_pfe_link(vf, &pfe, link_speed, vf->link_up); + } else { + ice_set_pfe_link(vf, &pfe, ls->link_speed, + ls->link_info & ICE_AQ_LINK_UP); + } ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, @@ -181,12 +168,14 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) { struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + struct device *dev; int first, last, v; struct ice_hw *hw; hw = &pf->hw; vsi = pf->vsi[vf->lan_vsi_idx]; + dev = ice_pf_to_dev(pf); wr32(hw, VPINT_ALLOC(vf->vf_id), 0); wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); @@ -205,14 +194,12 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0); else - dev_err(&pf->pdev->dev, - "Scattered mode for VF Tx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0); else - dev_err(&pf->pdev->dev, - "Scattered mode for VF Rx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); } /** @@ -252,11 +239,41 @@ static int ice_sriov_free_msix_res(struct ice_pf *pf) } /** + * ice_set_vf_state_qs_dis - Set VF queues state to disabled + * @vf: pointer to the VF structure + */ +void ice_set_vf_state_qs_dis(struct ice_vf *vf) +{ + /* Clear Rx/Tx enabled queues flag */ + bitmap_zero(vf->txq_ena, ICE_MAX_BASE_QS_PER_VF); + bitmap_zero(vf->rxq_ena, ICE_MAX_BASE_QS_PER_VF); + vf->num_qs_ena = 0; + clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); +} + +/** + * ice_dis_vf_qs - Disable the VF queues + * @vf: pointer to the VF structure + */ +static void ice_dis_vf_qs(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + + vsi = pf->vsi[vf->lan_vsi_idx]; + + ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); + ice_vsi_stop_rx_rings(vsi); + ice_set_vf_state_qs_dis(vf); +} + +/** * ice_free_vfs - Free all VFs * @pf: pointer to the PF structure */ void ice_free_vfs(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int tmp, i; @@ -267,19 +284,9 @@ void ice_free_vfs(struct ice_pf *pf) usleep_range(1000, 2000); /* Avoid wait time by stopping all VFs at the same time */ - for (i = 0; i < pf->num_alloc_vfs; i++) { - struct ice_vsi *vsi; - - if (!test_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states)) - continue; - - vsi = pf->vsi[pf->vf[i].lan_vsi_idx]; - /* stop rings without wait time */ - ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, i); - ice_vsi_stop_rx_rings(vsi); - - clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states); - } + ice_for_each_vf(pf, i) + if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states)) + ice_dis_vf_qs(&pf->vf[i]); /* Disable IOV before freeing resources. This lets any VF drivers * running in the host get themselves cleaned up before we yank @@ -288,31 +295,24 @@ void ice_free_vfs(struct ice_pf *pf) if (!pci_vfs_assigned(pf->pdev)) pci_disable_sriov(pf->pdev); else - dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n"); + dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); tmp = pf->num_alloc_vfs; pf->num_vf_qps = 0; pf->num_alloc_vfs = 0; for (i = 0; i < tmp; i++) { if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) { - /* disable VF qp mappings */ + /* disable VF qp mappings and set VF disable state */ ice_dis_vf_mappings(&pf->vf[i]); - - /* Set this state so that assigned VF vectors can be - * reclaimed by PF for reuse in ice_vsi_release(). No - * need to clear this bit since pf->vf array is being - * freed anyways after this for loop - */ - set_bit(ICE_VF_STATE_CFG_INTR, pf->vf[i].vf_states); + set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); ice_free_vf_res(&pf->vf[i]); } } if (ice_sriov_free_msix_res(pf)) - dev_err(&pf->pdev->dev, - "Failed to free MSIX resources used by SR-IOV\n"); + dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); - devm_kfree(&pf->pdev->dev, pf->vf); + devm_kfree(dev, pf->vf); pf->vf = NULL; /* This check is for when the driver is unloaded while VFs are @@ -341,18 +341,21 @@ void ice_free_vfs(struct ice_pf *pf) * ice_trigger_vf_reset - Reset a VF on HW * @vf: pointer to the VF structure * @is_vflr: true if VFLR was issued, false if not + * @is_pfr: true if the reset was triggered due to a previous PFR * * Trigger hardware to start a reset for a particular VF. Expects the caller * to wait the proper amount of time to allow hardware to reset the VF before * it cleans up and restores VF functionality. */ -static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr) +static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) { struct ice_pf *pf = vf->pf; u32 reg, reg_idx, bit_idx; + struct device *dev; struct ice_hw *hw; int vf_abs_id, i; + dev = ice_pf_to_dev(pf); hw = &pf->hw; vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id; @@ -367,10 +370,13 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr) */ clear_bit(ICE_VF_STATE_INIT, vf->vf_states); - /* Clear the VF's ARQLEN register. This is how the VF detects reset, - * since the VFGEN_RSTAT register doesn't stick at 0 after reset. + /* VF_MBX_ARQLEN is cleared by PFR, so the driver needs to clear it + * in the case of VFR. If this is done for PFR, it can mess up VF + * resets because the VF driver may already have started cleanup + * by the time we get here. */ - wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0); + if (!is_pfr) + wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0); /* In the case of a VFLR, the HW has already reset the VF and we * just need to clean up, so don't hit the VFRTRIG register. @@ -389,12 +395,14 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr) wr32(hw, PF_PCI_CIAA, VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S)); - for (i = 0; i < 100; i++) { + for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) { reg = rd32(hw, PF_PCI_CIAD); - if ((reg & VF_TRANS_PENDING_M) != 0) - dev_err(&pf->pdev->dev, - "VF %d PCI transactions stuck\n", vf->vf_id); - udelay(1); + /* no transactions pending so stop polling */ + if ((reg & VF_TRANS_PENDING_M) == 0) + break; + + dev_err(dev, "VF %d PCI transactions stuck\n", vf->vf_id); + udelay(ICE_PCI_CIAD_WAIT_DELAY_US); } } @@ -435,13 +443,12 @@ static void ice_vsi_kill_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt) */ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) { - struct device *dev = &vsi->back->pdev->dev; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; enum ice_status status; int ret = 0; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -453,7 +460,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_info(dev, "update VSI for port VLAN failed, err %d aq_err %d\n", + dev_info(ice_pf_to_dev(vsi->back), "update VSI for port VLAN failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -461,7 +468,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) vsi->info = ctxt->info; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -481,19 +488,20 @@ ice_vf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, u16 vf_id) } /** - * ice_calc_vf_first_vector_idx - Calculate absolute MSIX vector index in HW + * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space * @pf: pointer to PF structure * @vf: pointer to VF that the first MSIX vector index is being calculated for * - * This returns the first MSIX vector index in HW that is used by this VF and - * this will always be the OICR index in the AVF driver so any functionality + * This returns the first MSIX vector index in PF space that is used by this VF. + * This index is used when accessing PF relative registers such as + * GLINT_VECT2FUNC and GLINT_DYN_CTL. + * This will always be the OICR index in the AVF driver so any functionality * using vf->first_vector_idx for queue configuration will have to increment by * 1 to avoid meddling with the OICR index. */ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) { - return pf->hw.func_caps.common_cap.msix_vector_first_id + - pf->sriov_base_vector + vf->vf_id * pf->num_vf_msix; + return pf->sriov_base_vector + vf->vf_id * pf->num_vf_msix; } /** @@ -508,14 +516,16 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) LIST_HEAD(tmp_add_list); u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; + struct device *dev; int status = 0; + dev = ice_pf_to_dev(pf); /* first vector index is the VFs OICR index */ vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf); vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id); if (!vsi) { - dev_err(&pf->pdev->dev, "Failed to create VF VSI\n"); + dev_err(dev, "Failed to create VF VSI\n"); return -ENOMEM; } @@ -543,7 +553,9 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) status = ice_add_mac(&pf->hw, &tmp_add_list); if (status) - dev_err(&pf->pdev->dev, "could not add mac filters\n"); + dev_err(dev, "could not add mac filters error %d\n", status); + else + vf->num_mac = 1; /* Clear this bit after VF initialization since we shouldn't reclaim * and reassign interrupts for synchronous or asynchronous VFR events. @@ -551,9 +563,8 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) * expect vector assignment to be changed unless there is a request for * more vectors. */ - clear_bit(ICE_VF_STATE_CFG_INTR, vf->vf_states); ice_alloc_vsi_res_exit: - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); return status; } @@ -567,20 +578,21 @@ static int ice_alloc_vf_res(struct ice_vf *vf) int tx_rx_queue_left; int status; - /* setup VF VSI and necessary resources */ - status = ice_alloc_vsi_res(vf); - if (status) - goto ice_alloc_vf_res_exit; - /* Update number of VF queues, in case VF had requested for queue * changes */ - tx_rx_queue_left = min_t(int, pf->q_left_tx, pf->q_left_rx); + tx_rx_queue_left = min_t(int, ice_get_avail_txq_count(pf), + ice_get_avail_rxq_count(pf)); tx_rx_queue_left += ICE_DFLT_QS_PER_VF; if (vf->num_req_qs && vf->num_req_qs <= tx_rx_queue_left && vf->num_req_qs != vf->num_vf_qs) vf->num_vf_qs = vf->num_req_qs; + /* setup VF VSI and necessary resources */ + status = ice_alloc_vsi_res(vf); + if (status) + goto ice_alloc_vf_res_exit; + if (vf->trusted) set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); else @@ -605,27 +617,32 @@ ice_alloc_vf_res_exit: */ static void ice_ena_vf_mappings(struct ice_vf *vf) { + int abs_vf_id, abs_first, abs_last; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + struct device *dev; int first, last, v; struct ice_hw *hw; - int abs_vf_id; u32 reg; + dev = ice_pf_to_dev(pf); hw = &pf->hw; vsi = pf->vsi[vf->lan_vsi_idx]; first = vf->first_vector_idx; last = (first + pf->num_vf_msix) - 1; + abs_first = first + pf->hw.func_caps.common_cap.msix_vector_first_id; + abs_last = (abs_first + pf->num_vf_msix) - 1; abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; /* VF Vector allocation */ - reg = (((first << VPINT_ALLOC_FIRST_S) & VPINT_ALLOC_FIRST_M) | - ((last << VPINT_ALLOC_LAST_S) & VPINT_ALLOC_LAST_M) | + reg = (((abs_first << VPINT_ALLOC_FIRST_S) & VPINT_ALLOC_FIRST_M) | + ((abs_last << VPINT_ALLOC_LAST_S) & VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M); wr32(hw, VPINT_ALLOC(vf->vf_id), reg); - reg = (((first << VPINT_ALLOC_PCI_FIRST_S) & VPINT_ALLOC_PCI_FIRST_M) | - ((last << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) | + reg = (((abs_first << VPINT_ALLOC_PCI_FIRST_S) + & VPINT_ALLOC_PCI_FIRST_M) | + ((abs_last << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M); wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg); /* map the interrupts to its functions */ @@ -656,8 +673,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) VPLAN_TX_QBASE_VFNUMQ_M)); wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg); } else { - dev_err(&pf->pdev->dev, - "Scattered mode for VF Tx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); } /* set regardless of mapping mode */ @@ -675,8 +691,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) VPLAN_RX_QBASE_VFNUMQ_M)); wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg); } else { - dev_err(&pf->pdev->dev, - "Scattered mode for VF Rx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); } } @@ -822,6 +837,7 @@ static int ice_check_avail_res(struct ice_pf *pf) { int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); u16 num_msix, num_txq, num_rxq, num_avail_msix; + struct device *dev = ice_pf_to_dev(pf); if (!pf->num_alloc_vfs || max_valid_res_idx < 0) return -EINVAL; @@ -854,8 +870,7 @@ static int ice_check_avail_res(struct ice_pf *pf) ICE_DFLT_INTR_PER_VF, ICE_MIN_INTR_PER_VF); } else { - dev_err(&pf->pdev->dev, - "Number of VFs %d exceeds max VF count %d\n", + dev_err(dev, "Number of VFs %d exceeds max VF count %d\n", pf->num_alloc_vfs, ICE_MAX_VF_COUNT); return -EIO; } @@ -870,11 +885,11 @@ static int ice_check_avail_res(struct ice_pf *pf) * at runtime through Virtchnl, that is the reason we start by reserving * few queues. */ - num_txq = ice_determine_res(pf, pf->q_left_tx, ICE_DFLT_QS_PER_VF, - ICE_MIN_QS_PER_VF); + num_txq = ice_determine_res(pf, ice_get_avail_txq_count(pf), + ICE_DFLT_QS_PER_VF, ICE_MIN_QS_PER_VF); - num_rxq = ice_determine_res(pf, pf->q_left_rx, ICE_DFLT_QS_PER_VF, - ICE_MIN_QS_PER_VF); + num_rxq = ice_determine_res(pf, ice_get_avail_rxq_count(pf), + ICE_DFLT_QS_PER_VF, ICE_MIN_QS_PER_VF); if (!num_txq || !num_rxq) return -EIO; @@ -928,10 +943,17 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf) /* reallocate VF resources to finish resetting the VSI state */ if (!ice_alloc_vf_res(vf)) { + struct ice_vsi *vsi; + ice_ena_vf_mappings(vf); set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); clear_bit(ICE_VF_STATE_DIS, vf->vf_states); - vf->num_vlan = 0; + + vsi = pf->vsi[vf->lan_vsi_idx]; + if (ice_vsi_add_vlan(vsi, 0)) + dev_warn(ice_pf_to_dev(pf), + "Failed to add VLAN 0 filter for VF %d, MDD events will trigger. Reset the VF, disable spoofchk, or enable 8021q module on the guest", + vf->vf_id); } /* Tell the VF driver the reset is done. This needs to be done only @@ -960,7 +982,7 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m, struct ice_hw *hw; hw = &pf->hw; - if (vf->num_vlan) { + if (vsi->num_vlan) { status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m, rm_promisc); } else if (vf->port_vlan_id) { @@ -983,6 +1005,46 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m, } /** + * ice_config_res_vfs - Finalize allocation of VFs resources in one go + * @pf: pointer to the PF structure + * + * This function is being called as last part of resetting all VFs, or when + * configuring VFs for the first time, where there is no resource to be freed + * Returns true if resources were properly allocated for all VFs, and false + * otherwise. + */ +static bool ice_config_res_vfs(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + int v; + + if (ice_check_avail_res(pf)) { + dev_err(dev, "Cannot allocate VF resources, try with fewer number of VFs\n"); + return false; + } + + /* rearm global interrupts */ + if (test_and_clear_bit(__ICE_OICR_INTR_DIS, pf->state)) + ice_irq_dynamic_ena(hw, NULL, NULL); + + /* Finish resetting each VF and allocate resources */ + ice_for_each_vf(pf, v) { + struct ice_vf *vf = &pf->vf[v]; + + vf->num_vf_qs = pf->num_vf_qps; + dev_dbg(dev, "VF-id %d has %d queues configured\n", vf->vf_id, + vf->num_vf_qs); + ice_cleanup_and_realloc_vf(vf); + } + + ice_flush(hw); + clear_bit(__ICE_VF_DIS, pf->state); + + return true; +} + +/** * ice_reset_all_vfs - reset all allocated VFs in one go * @pf: pointer to the PF structure * @is_vflr: true if VFLR was issued, false if not @@ -996,6 +1058,7 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m, */ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; struct ice_vf *vf; int v, i; @@ -1009,19 +1072,18 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) return false; /* Begin reset on all VFs at once */ - for (v = 0; v < pf->num_alloc_vfs; v++) - ice_trigger_vf_reset(&pf->vf[v], is_vflr); + ice_for_each_vf(pf, v) + ice_trigger_vf_reset(&pf->vf[v], is_vflr, true); - for (v = 0; v < pf->num_alloc_vfs; v++) { + ice_for_each_vf(pf, v) { struct ice_vsi *vsi; vf = &pf->vf[v]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { - ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id); - ice_vsi_stop_rx_rings(vsi); - clear_bit(ICE_VF_STATE_ENA, vf->vf_states); - } + if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) + ice_dis_vf_qs(vf); + ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, + NULL, ICE_VF_RESET, vf->vf_id, NULL); } /* HW requires some time to make sure it can flush the FIFO for a VF @@ -1031,16 +1093,17 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) * finished resetting. */ for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { - usleep_range(10000, 20000); - /* Check each VF in sequence */ while (v < pf->num_alloc_vfs) { u32 reg; vf = &pf->vf[v]; reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id)); - if (!(reg & VPGEN_VFRSTAT_VFRD_M)) + if (!(reg & VPGEN_VFRSTAT_VFRD_M)) { + /* only delay if the check failed */ + usleep_range(10, 20); break; + } /* If the current VF has finished resetting, move on * to the next VF in sequence. @@ -1053,11 +1116,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) * time, but continue on with the operation. */ if (v < pf->num_alloc_vfs) - dev_warn(&pf->pdev->dev, "VF reset check timeout\n"); - usleep_range(10000, 20000); + dev_warn(dev, "VF reset check timeout\n"); /* free VF resources to begin resetting the VSI state */ - for (v = 0; v < pf->num_alloc_vfs; v++) { + ice_for_each_vf(pf, v) { vf = &pf->vf[v]; ice_free_vf_res(vf); @@ -1071,30 +1133,31 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) } if (ice_sriov_free_msix_res(pf)) - dev_err(&pf->pdev->dev, - "Failed to free MSIX resources used by SR-IOV\n"); + dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); - if (ice_check_avail_res(pf)) { - dev_err(&pf->pdev->dev, - "Cannot allocate VF resources, try with fewer number of VFs\n"); + if (!ice_config_res_vfs(pf)) return false; - } - - /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) { - vf = &pf->vf[v]; - vf->num_vf_qs = pf->num_vf_qps; - dev_dbg(&pf->pdev->dev, - "VF-id %d has %d queues configured\n", - vf->vf_id, vf->num_vf_qs); - ice_cleanup_and_realloc_vf(vf); - } + return true; +} - ice_flush(hw); - clear_bit(__ICE_VF_DIS, pf->state); +/** + * ice_is_vf_disabled + * @vf: pointer to the VF info + * + * Returns true if the PF or VF is disabled, false otherwise. + */ +static bool ice_is_vf_disabled(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; - return true; + /* If the PF has been disabled, there is no need resetting VF until + * PF is active again. Similarly, if the VF has been disabled, this + * means something else is resetting the VF, so we shouldn't continue. + * Otherwise, set disable VF state bit for actual reset, and continue. + */ + return (test_bit(__ICE_VF_DIS, pf->state) || + test_bit(ICE_VF_STATE_DIS, vf->vf_states)); } /** @@ -1108,33 +1171,35 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) { struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + struct device *dev; struct ice_hw *hw; bool rsd = false; u8 promisc_m; u32 reg; int i; - /* If the VFs have been disabled, this means something else is - * resetting the VF, so we shouldn't continue. - */ - if (test_and_set_bit(__ICE_VF_DIS, pf->state)) - return false; + dev = ice_pf_to_dev(pf); - ice_trigger_vf_reset(vf, is_vflr); + if (ice_is_vf_disabled(vf)) { + dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", + vf->vf_id); + return true; + } + + /* Set VF disable bit state here, before triggering reset */ + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + ice_trigger_vf_reset(vf, is_vflr, false); vsi = pf->vsi[vf->lan_vsi_idx]; - if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { - ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id); - ice_vsi_stop_rx_rings(vsi); - clear_bit(ICE_VF_STATE_ENA, vf->vf_states); - } else { - /* Call Disable LAN Tx queue AQ call even when queues are not - * enabled. This is needed for successful completiom of VFR - */ - ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, - NULL, ICE_VF_RESET, vf->vf_id, NULL); - } + if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) + ice_dis_vf_qs(vf); + + /* Call Disable LAN Tx queue AQ whether or not queues are + * enabled. This is needed for successful completion of VFR. + */ + ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, + NULL, ICE_VF_RESET, vf->vf_id, NULL); hw = &pf->hw; /* poll VPGEN_VFRSTAT reg to make sure @@ -1145,36 +1210,35 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) * poll the status register to make sure that the reset * completed successfully. */ - usleep_range(10000, 20000); reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id)); if (reg & VPGEN_VFRSTAT_VFRD_M) { rsd = true; break; } + + /* only sleep if the reset is not done */ + usleep_range(10, 20); } /* Display a warning if VF didn't manage to reset in time, but need to * continue on with the operation. */ if (!rsd) - dev_warn(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - vf->vf_id); - - usleep_range(10000, 20000); + dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id); /* disable promiscuous modes in case they were enabled * ignore any error if disabling process failed */ if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { - if (vf->port_vlan_id || vf->num_vlan) + if (vf->port_vlan_id || vsi->num_vlan) promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; else promisc_m = ICE_UCAST_PROMISC_BITS; vsi = pf->vsi[vf->lan_vsi_idx]; if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true)) - dev_err(&pf->pdev->dev, "disabling promiscuous mode failed\n"); + dev_err(dev, "disabling promiscuous mode failed\n"); } /* free VF resources to begin resetting the VSI state */ @@ -1183,7 +1247,6 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) ice_cleanup_and_realloc_vf(vf); ice_flush(hw); - clear_bit(__ICE_VF_DIS, pf->state); return true; } @@ -1196,7 +1259,7 @@ void ice_vc_notify_link_state(struct ice_pf *pf) { int i; - for (i = 0; i < pf->num_alloc_vfs; i++) + ice_for_each_vf(pf, i) ice_vc_notify_vf_link_state(&pf->vf[i]); } @@ -1226,19 +1289,26 @@ void ice_vc_notify_reset(struct ice_pf *pf) static void ice_vc_notify_vf_reset(struct ice_vf *vf) { struct virtchnl_pf_event pfe; + struct ice_pf *pf; - /* validate the request */ - if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs) + if (!vf) return; - /* verify if the VF is in either init or active before proceeding */ - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && - !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + pf = vf->pf; + if (ice_validate_vf_id(pf, vf->vf_id)) + return; + + /* Bail out if VF is in disabled state, neither initialized, nor active + * state - otherwise proceed with notifications + */ + if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && + !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) || + test_bit(ICE_VF_STATE_DIS, vf->vf_states)) return; pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM; - ice_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT, + ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe), NULL); } @@ -1250,6 +1320,7 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf) */ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; struct ice_vf *vfs; int i, ret; @@ -1257,7 +1328,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) /* Disable global interrupt 0 so we don't try to handle the VFLR. */ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); - + set_bit(__ICE_OICR_INTR_DIS, pf->state); ice_flush(hw); ret = pci_enable_sriov(pf->pdev, num_alloc_vfs); @@ -1266,16 +1337,16 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) goto err_unroll_intr; } /* allocate memory */ - vfs = devm_kcalloc(&pf->pdev->dev, num_alloc_vfs, sizeof(*vfs), - GFP_KERNEL); + vfs = devm_kcalloc(dev, num_alloc_vfs, sizeof(*vfs), GFP_KERNEL); if (!vfs) { ret = -ENOMEM; goto err_pci_disable_sriov; } pf->vf = vfs; + pf->num_alloc_vfs = num_alloc_vfs; /* apply default profile */ - for (i = 0; i < num_alloc_vfs; i++) { + ice_for_each_vf(pf, i) { vfs[i].pf = pf; vfs[i].vf_sw_id = pf->first_sw; vfs[i].vf_id = i; @@ -1283,23 +1354,19 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) /* assign default capabilities */ set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps); vfs[i].spoofchk = true; - - /* Set this state so that PF driver does VF vector assignment */ - set_bit(ICE_VF_STATE_CFG_INTR, vfs[i].vf_states); } - pf->num_alloc_vfs = num_alloc_vfs; - /* VF resources get allocated during reset */ - if (!ice_reset_all_vfs(pf, true)) { + /* VF resources get allocated with initialization */ + if (!ice_config_res_vfs(pf)) { ret = -EIO; goto err_unroll_sriov; } - goto err_unroll_intr; + return ret; err_unroll_sriov: pf->vf = NULL; - devm_kfree(&pf->pdev->dev, vfs); + devm_kfree(dev, vfs); vfs = NULL; pf->num_alloc_vfs = 0; err_pci_disable_sriov: @@ -1307,6 +1374,7 @@ err_pci_disable_sriov: err_unroll_intr: /* rearm interrupts here */ ice_irq_dynamic_ena(hw, NULL, NULL); + clear_bit(__ICE_OICR_INTR_DIS, pf->state); return ret; } @@ -1343,7 +1411,7 @@ static bool ice_pf_state_is_nominal(struct ice_pf *pf) static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) { int pre_existing_vfs = pci_num_vf(pf->pdev); - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); int err; if (!ice_pf_state_is_nominal(pf)) { @@ -1353,7 +1421,7 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) { dev_err(dev, "This device is not capable of SR-IOV\n"); - return -ENODEV; + return -EOPNOTSUPP; } if (pre_existing_vfs && pre_existing_vfs != num_vfs) @@ -1388,6 +1456,12 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct ice_pf *pf = pci_get_drvdata(pdev); + struct device *dev = ice_pf_to_dev(pf); + + if (ice_is_safe_mode(pf)) { + dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n"); + return -EOPNOTSUPP; + } if (num_vfs) return ice_pci_sriov_ena(pf, num_vfs); @@ -1395,8 +1469,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pdev)) { ice_free_vfs(pf); } else { - dev_err(&pf->pdev->dev, - "can't free VFs because some are assigned to VMs.\n"); + dev_err(dev, "can't free VFs because some are assigned to VMs.\n"); return -EBUSY; } @@ -1420,7 +1493,7 @@ void ice_process_vflr_event(struct ice_pf *pf) !pf->num_alloc_vfs) return; - for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) { + ice_for_each_vf(pf, vf_id) { struct ice_vf *vf = &pf->vf[vf_id]; u32 reg_idx, bit_idx; @@ -1435,12 +1508,10 @@ void ice_process_vflr_event(struct ice_pf *pf) } /** - * ice_vc_dis_vf - Disable a given VF via SW reset + * ice_vc_reset_vf - Perform software reset on the VF after informing the AVF * @vf: pointer to the VF info - * - * Disable the VF through a SW reset */ -static void ice_vc_dis_vf(struct ice_vf *vf) +static void ice_vc_reset_vf(struct ice_vf *vf) { ice_vc_notify_vf_reset(vf); ice_reset_vf(vf, false); @@ -1461,24 +1532,27 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { enum ice_status aq_ret; + struct device *dev; struct ice_pf *pf; - /* validate the request */ - if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs) + if (!vf) return -EINVAL; pf = vf->pf; + if (ice_validate_vf_id(pf, vf->vf_id)) + return -EINVAL; + + dev = ice_pf_to_dev(pf); /* single place to detect unsuccessful return values */ if (v_retval) { vf->num_inval_msgs++; - dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n", - vf->vf_id, v_opcode, v_retval); + dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id, + v_opcode, v_retval); if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) { - dev_err(&pf->pdev->dev, - "Number of invalid messages exceeded for VF %d\n", + dev_err(dev, "Number of invalid messages exceeded for VF %d\n", vf->vf_id); - dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n"); + dev_err(dev, "Use PF Control I/F to enable the VF\n"); set_bit(ICE_VF_STATE_DIS, vf->vf_states); return -EIO; } @@ -1490,10 +1564,9 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval, msg, msglen, NULL); - if (aq_ret) { - dev_info(&pf->pdev->dev, - "Unable to send the message to VF %d aq_err %d\n", - vf->vf_id, pf->hw.mailboxq.sq_last_status); + if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) { + dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %d\n", + vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status); return -EIO; } @@ -1539,14 +1612,14 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) int len = 0; int ret; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + if (ice_check_vf_init(pf, vf)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto err; } len = sizeof(struct virtchnl_vf_resource); - vfres = devm_kzalloc(&pf->pdev->dev, len, GFP_KERNEL); + vfres = kzalloc(len, GFP_KERNEL); if (!vfres) { v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; len = 0; @@ -1612,6 +1685,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) ether_addr_copy(vfres->vsi_res[0].default_mac_addr, vf->dflt_lan_addr.addr); + /* match guest capabilities */ + vf->driver_caps = vfres->vf_cap_flags; + set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); err: @@ -1619,7 +1695,7 @@ err: ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret, (u8 *)vfres, len); - devm_kfree(&pf->pdev->dev, vfres); + kfree(vfres); return ret; } @@ -1688,6 +1764,21 @@ static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid) } /** + * ice_vc_isvalid_ring_len + * @ring_len: length of ring + * + * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE + * or zero + */ +static bool ice_vc_isvalid_ring_len(u16 ring_len) +{ + return ring_len == 0 || + (ring_len >= ICE_MIN_NUM_DESC && + ring_len <= ICE_MAX_NUM_DESC && + !(ring_len % ICE_REQ_DESC_MULTIPLE)); +} + +/** * ice_vc_config_rss_key * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1700,7 +1791,7 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi = NULL; + struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -1712,18 +1803,18 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; - if (!vsi) { + if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) { + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1747,7 +1838,7 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi = NULL; + struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -1759,18 +1850,18 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; - if (!vsi) { + if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) { + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1783,6 +1874,87 @@ error_param: } /** + * ice_set_vf_spoofchk + * @netdev: network interface device structure + * @vf_id: VF identifier + * @ena: flag to enable or disable feature + * + * Enable or disable VF spoof checking + */ +int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + struct ice_vsi_ctx *ctx; + struct ice_vsi *vf_vsi; + enum ice_status status; + struct device *dev; + struct ice_vf *vf; + int ret = 0; + + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + + if (ice_check_vf_init(pf, vf)) + return -EBUSY; + + vf_vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vf_vsi) { + netdev_err(netdev, "VSI %d for VF %d is null\n", + vf->lan_vsi_idx, vf->vf_id); + return -EINVAL; + } + + if (vf_vsi->type != ICE_VSI_VF) { + netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", + vf_vsi->type, vf_vsi->vsi_num, vf->vf_id); + return -ENODEV; + } + + if (ena == vf->spoofchk) { + dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF"); + return 0; + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->info.sec_flags = vf_vsi->info.sec_flags; + ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + if (ena) { + ctx->info.sec_flags |= + ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + } else { + ctx->info.sec_flags &= + ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)); + } + + status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL); + if (status) { + dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %d", + ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status); + ret = -EIO; + goto out; + } + + /* only update spoofchk state and VSI context on success */ + vf_vsi->info.sec_flags = ctx->info.sec_flags; + vf->spoofchk = ena; + +out: + kfree(ctx); + return ret; +} + +/** * ice_vc_get_stats_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1794,8 +1966,8 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; + struct ice_eth_stats stats = { 0 }; struct ice_pf *pf = vf->pf; - struct ice_eth_stats stats; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -1814,7 +1986,6 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - memset(&stats, 0, sizeof(struct ice_eth_stats)); ice_update_eth_stats(vsi); stats = vsi->eth_stats; @@ -1839,6 +2010,8 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) (struct virtchnl_queue_select *)msg; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + unsigned long q_map; + u16 vf_q_id; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -1855,6 +2028,12 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } + if (vqs->rx_queues > ICE_MAX_BASE_QS_PER_VF || + vqs->tx_queues > ICE_MAX_BASE_QS_PER_VF) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -1865,12 +2044,47 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) * Tx queue group list was configured and the context bits were * programmed using ice_vsi_cfg_txqs */ - if (ice_vsi_start_rx_rings(vsi)) - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + q_map = vqs->rx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if enabled */ + if (test_bit(vf_q_id, vf->rxq_ena)) + continue; + + if (ice_vsi_ctrl_rx_ring(vsi, true, vf_q_id)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n", + vf_q_id, vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + set_bit(vf_q_id, vf->rxq_ena); + vf->num_qs_ena++; + } + + vsi = pf->vsi[vf->lan_vsi_idx]; + q_map = vqs->tx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if enabled */ + if (test_bit(vf_q_id, vf->txq_ena)) + continue; + + set_bit(vf_q_id, vf->txq_ena); + vf->num_qs_ena++; + } /* Set flag to indicate that queues are enabled */ if (v_ret == VIRTCHNL_STATUS_SUCCESS) - set_bit(ICE_VF_STATE_ENA, vf->vf_states); + set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); error_param: /* send the response to the VF */ @@ -1893,9 +2107,11 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) (struct virtchnl_queue_select *)msg; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + unsigned long q_map; + u16 vf_q_id; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) && - !test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { + !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1910,29 +2126,79 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } + if (vqs->rx_queues > ICE_MAX_BASE_QS_PER_VF || + vqs->tx_queues > ICE_MAX_BASE_QS_PER_VF) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id)) { - dev_err(&vsi->back->pdev->dev, - "Failed to stop tx rings on VSI %d\n", - vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + if (vqs->tx_queues) { + q_map = vqs->tx_queues; + + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) { + struct ice_ring *ring = vsi->tx_rings[vf_q_id]; + struct ice_txq_meta txq_meta = { 0 }; + + if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if not enabled */ + if (!test_bit(vf_q_id, vf->txq_ena)) + continue; + + ice_fill_txq_meta(vsi, ring, &txq_meta); + + if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, + ring, &txq_meta)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", + vf_q_id, vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Clear enabled queues flag */ + clear_bit(vf_q_id, vf->txq_ena); + vf->num_qs_ena--; + } } - if (ice_vsi_stop_rx_rings(vsi)) { - dev_err(&vsi->back->pdev->dev, - "Failed to stop rx rings on VSI %d\n", - vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + if (vqs->rx_queues) { + q_map = vqs->rx_queues; + + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if not enabled */ + if (!test_bit(vf_q_id, vf->rxq_ena)) + continue; + + if (ice_vsi_ctrl_rx_ring(vsi, false, vf_q_id)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n", + vf_q_id, vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Clear enabled queues flag */ + clear_bit(vf_q_id, vf->rxq_ena); + vf->num_qs_ena--; + } } /* Clear enabled queues flag */ - if (v_ret == VIRTCHNL_STATUS_SUCCESS) - clear_bit(ICE_VF_STATE_ENA, vf->vf_states); + if (v_ret == VIRTCHNL_STATUS_SUCCESS && !vf->num_qs_ena) + clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); error_param: /* send the response to the VF */ @@ -1962,12 +2228,6 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) irqmap_info = (struct virtchnl_irq_map_info *)msg; num_q_vectors_mapped = irqmap_info->num_vectors; - vsi = pf->vsi[vf->lan_vsi_idx]; - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - /* Check to make sure number of VF vectors mapped is not greater than * number of VF vectors originally allocated, and check that * there is actually at least a single VF queue vector mapped @@ -1979,6 +2239,12 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) goto error_param; } + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + for (i = 0; i < num_q_vectors_mapped; i++) { struct ice_q_vector *q_vector; @@ -1986,9 +2252,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) vector_id = map->vector_id; vsi_id = map->vsi_id; - /* validate msg params */ - if (!(vector_id < pf->hw.func_caps.common_cap - .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) || + /* vector_id is always 0-based for each VF, and can never be + * larger than or equal to the max allowed interrupts per VF + */ + if (!(vector_id < ICE_MAX_INTR_PER_VF) || + !ice_vc_isvalid_vsi_id(vf, vsi_id) || (!vector_id && (map->rxq_map || map->txq_map))) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2056,6 +2324,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; + u16 num_rxq = 0, num_txq = 0; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; int i; @@ -2071,13 +2340,15 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } vsi = pf->vsi[vf->lan_vsi_idx]; - if (!vsi) + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; + } - if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF) { - dev_err(&pf->pdev->dev, - "VF-%d requesting more than supported number of queues: %d\n", - vf->vf_id, qci->num_queue_pairs); + if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF || + qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -2087,37 +2358,52 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) if (qpi->txq.vsi_id != qci->vsi_id || qpi->rxq.vsi_id != qci->vsi_id || qpi->rxq.queue_id != qpi->txq.queue_id || + qpi->txq.headwb_enabled || + !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || + !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } /* copy Tx queue info from VF into VSI */ - vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr; - vsi->tx_rings[i]->count = qpi->txq.ring_len; - /* copy Rx queue info from VF into VSI */ - vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; - vsi->rx_rings[i]->count = qpi->rxq.ring_len; - if (qpi->rxq.databuffer_size > ((16 * 1024) - 128)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; + if (qpi->txq.ring_len > 0) { + num_txq++; + vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr; + vsi->tx_rings[i]->count = qpi->txq.ring_len; } - vsi->rx_buf_len = qpi->rxq.databuffer_size; - if (qpi->rxq.max_pkt_size >= (16 * 1024) || - qpi->rxq.max_pkt_size < 64) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; + + /* copy Rx queue info from VF into VSI */ + if (qpi->rxq.ring_len > 0) { + num_rxq++; + vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; + vsi->rx_rings[i]->count = qpi->rxq.ring_len; + + if (qpi->rxq.databuffer_size != 0 && + (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || + qpi->rxq.databuffer_size < 1024)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + vsi->rx_buf_len = qpi->rxq.databuffer_size; + vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len; + if (qpi->rxq.max_pkt_size >= (16 * 1024) || + qpi->rxq.max_pkt_size < 64) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } } + vsi->max_frame = qpi->rxq.max_pkt_size; } /* VF can request to configure less than allocated queues * or default allocated queues. So update the VSI with new number */ - vsi->num_txq = qci->num_queue_pairs; - vsi->num_rxq = qci->num_queue_pairs; + vsi->num_txq = num_txq; + vsi->num_rxq = num_rxq; /* All queues of VF VSI are in TC 0 */ - vsi->tc_cfg.tc_info[0].qcount_tx = qci->num_queue_pairs; - vsi->tc_cfg.tc_info[0].qcount_rx = qci->num_queue_pairs; + vsi->tc_cfg.tc_info[0].qcount_tx = num_txq; + vsi->tc_cfg.tc_info[0].qcount_rx = num_rxq; if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi)) v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; @@ -2156,6 +2442,83 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf) } /** + * ice_vc_add_mac_addr - attempt to add the MAC address passed in + * @vf: pointer to the VF info + * @vsi: pointer to the VF's VSI + * @mac_addr: MAC address to add + */ +static int +ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + enum ice_status status; + + /* default unicast MAC already added */ + if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + return 0; + + if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) { + dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); + return -EPERM; + } + + status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, true); + if (status == ICE_ERR_ALREADY_EXISTS) { + dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr, + vf->vf_id); + return -EEXIST; + } else if (status) { + dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n", + mac_addr, vf->vf_id, status); + return -EIO; + } + + /* only set dflt_lan_addr once */ + if (is_zero_ether_addr(vf->dflt_lan_addr.addr) && + is_unicast_ether_addr(mac_addr)) + ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr); + + vf->num_mac++; + + return 0; +} + +/** + * ice_vc_del_mac_addr - attempt to delete the MAC address passed in + * @vf: pointer to the VF info + * @vsi: pointer to the VF's VSI + * @mac_addr: MAC address to delete + */ +static int +ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + enum ice_status status; + + if (!ice_can_vf_change_mac(vf) && + ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + return 0; + + status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, false); + if (status == ICE_ERR_DOES_NOT_EXIST) { + dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr, + vf->vf_id); + return -ENOENT; + } else if (status) { + dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n", + mac_addr, vf->vf_id, status); + return -EIO; + } + + if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) + eth_zero_addr(vf->dflt_lan_addr.addr); + + vf->num_mac--; + + return 0; +} + +/** * ice_vc_handle_mac_addr_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2166,20 +2529,23 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf) static int ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) { + int (*ice_vc_cfg_mac) + (struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr); enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_ether_addr_list *al = (struct virtchnl_ether_addr_list *)msg; struct ice_pf *pf = vf->pf; enum virtchnl_ops vc_op; - LIST_HEAD(mac_list); struct ice_vsi *vsi; - int mac_count = 0; int i; - if (set) + if (set) { vc_op = VIRTCHNL_OP_ADD_ETH_ADDR; - else + ice_vc_cfg_mac = ice_vc_add_mac_addr; + } else { vc_op = VIRTCHNL_OP_DEL_ETH_ADDR; + ice_vc_cfg_mac = ice_vc_del_mac_addr; + } if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) { @@ -2187,14 +2553,14 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) goto handle_mac_exit; } + /* If this VF is not privileged, then we can't add more than a + * limited number of addresses. Check to make sure that the + * additions do not push us over the limit. + */ if (set && !ice_is_vf_trusted(vf) && (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) { - dev_err(&pf->pdev->dev, - "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n", + dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n", vf->vf_id); - /* There is no need to let VF know about not being trusted - * to add more MAC addr, so we can just return success message. - */ v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; } @@ -2206,72 +2572,23 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) } for (i = 0; i < al->num_elements; i++) { - u8 *maddr = al->list[i].addr; + u8 *mac_addr = al->list[i].addr; + int result; - if (ether_addr_equal(maddr, vf->dflt_lan_addr.addr) || - is_broadcast_ether_addr(maddr)) { - if (set) { - /* VF is trying to add filters that the PF - * already added. Just continue. - */ - dev_info(&pf->pdev->dev, - "MAC %pM already set for VF %d\n", - maddr, vf->vf_id); - continue; - } else { - /* VF can't remove dflt_lan_addr/bcast MAC */ - dev_err(&pf->pdev->dev, - "VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n", - maddr, vf->vf_id); - continue; - } - } - - /* check for the invalid cases and bail if necessary */ - if (is_zero_ether_addr(maddr)) { - dev_err(&pf->pdev->dev, - "invalid MAC %pM provided for VF %d\n", - maddr, vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto handle_mac_exit; - } - - if (is_unicast_ether_addr(maddr) && - !ice_can_vf_change_mac(vf)) { - dev_err(&pf->pdev->dev, - "can't change unicast MAC for untrusted VF %d\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto handle_mac_exit; - } + if (is_broadcast_ether_addr(mac_addr) || + is_zero_ether_addr(mac_addr)) + continue; - /* get here if maddr is multicast or if VF can change MAC */ - if (ice_add_mac_to_list(vsi, &mac_list, al->list[i].addr)) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + result = ice_vc_cfg_mac(vf, vsi, mac_addr); + if (result == -EEXIST || result == -ENOENT) { + continue; + } else if (result) { + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; goto handle_mac_exit; } - mac_count++; - } - - /* program the updated filter list */ - if (set) - v_ret = ice_err_to_virt_err(ice_add_mac(&pf->hw, &mac_list)); - else - v_ret = ice_err_to_virt_err(ice_remove_mac(&pf->hw, &mac_list)); - - if (v_ret) { - dev_err(&pf->pdev->dev, - "can't update MAC filters for VF %d, error %d\n", - vf->vf_id, v_ret); - } else { - if (set) - vf->num_mac += mac_count; - else - vf->num_mac -= mac_count; } handle_mac_exit: - ice_free_fltr_list(&pf->pdev->dev, &mac_list); /* send the response to the VF */ return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0); } @@ -2315,41 +2632,41 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_vf_res_request *vfres = (struct virtchnl_vf_res_request *)msg; - int req_queues = vfres->num_queue_pairs; + u16 req_queues = vfres->num_queue_pairs; struct ice_pf *pf = vf->pf; - int max_allowed_vf_queues; - int tx_rx_queue_left; - int cur_queues; + u16 max_allowed_vf_queues; + u16 tx_rx_queue_left; + struct device *dev; + u16 cur_queues; + dev = ice_pf_to_dev(pf); if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } cur_queues = vf->num_vf_qs; - tx_rx_queue_left = min_t(int, pf->q_left_tx, pf->q_left_rx); + tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf), + ice_get_avail_rxq_count(pf)); max_allowed_vf_queues = tx_rx_queue_left + cur_queues; - if (req_queues <= 0) { - dev_err(&pf->pdev->dev, - "VF %d tried to request %d queues. Ignoring.\n", - vf->vf_id, req_queues); + if (!req_queues) { + dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n", + vf->vf_id); } else if (req_queues > ICE_MAX_BASE_QS_PER_VF) { - dev_err(&pf->pdev->dev, - "VF %d tried to request more than %d queues.\n", + dev_err(dev, "VF %d tried to request more than %d queues.\n", vf->vf_id, ICE_MAX_BASE_QS_PER_VF); vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF; - } else if (req_queues - cur_queues > tx_rx_queue_left) { - dev_warn(&pf->pdev->dev, - "VF %d requested %d more queues, but only %d left.\n", + } else if (req_queues > cur_queues && + req_queues - cur_queues > tx_rx_queue_left) { + dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n", vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); - vfres->num_queue_pairs = min_t(int, max_allowed_vf_queues, + vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues, ICE_MAX_BASE_QS_PER_VF); } else { /* request is successful, then reset VF */ vf->num_req_qs = req_queues; - ice_vc_dis_vf(vf); - dev_info(&pf->pdev->dev, - "VF %d granted request of %d queues.\n", + ice_vc_reset_vf(vf); + dev_info(dev, "VF %d granted request of %u queues.\n", vf->vf_id, req_queues); return 0; } @@ -2375,39 +2692,34 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, __be16 vlan_proto) { u16 vlanprio = vlan_id | (qos << ICE_VLAN_PRIORITY_S); - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vsi *vsi; + struct device *dev; struct ice_vf *vf; int ret = 0; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id); + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } if (vlan_id > ICE_MAX_VLANID || qos > 7) { - dev_err(&pf->pdev->dev, "Invalid VF Parameters\n"); + dev_err(dev, "Invalid VF Parameters\n"); return -EINVAL; } if (vlan_proto != htons(ETH_P_8021Q)) { - dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n"); + dev_err(dev, "VF VLAN protocol is not supported\n"); return -EPROTONOSUPPORT; } vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id); + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } if (le16_to_cpu(vsi->info.pvid) == vlanprio) { /* duplicate request, so just return success */ - dev_info(&pf->pdev->dev, - "Duplicate pvid %d request\n", vlanprio); + dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio); return ret; } @@ -2426,7 +2738,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, } if (vlan_id) { - dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n", + dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n", vlan_id, qos, vf_id); /* add new VLAN filter for each MAC */ @@ -2445,6 +2757,17 @@ error_set_pvid: } /** + * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads + * @caps: VF driver negotiated capabilities + * + * Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false + */ +static bool ice_vf_vlan_offload_ena(u32 caps) +{ + return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN); +} + +/** * ice_vc_process_vlan_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2460,37 +2783,33 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) struct ice_pf *pf = vf->pf; bool vlan_promisc = false; struct ice_vsi *vsi; + struct device *dev; struct ice_hw *hw; int status = 0; u8 promisc_m; int i; + dev = ice_pf_to_dev(pf); if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { + if (!ice_vf_vlan_offload_ena(vf->driver_caps)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (add_v && !ice_is_vf_trusted(vf) && - vf->num_vlan >= ICE_MAX_VLAN_PER_VF) { - dev_info(&pf->pdev->dev, - "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", - vf->vf_id); - /* There is no need to let VF know about being not trusted, - * so we can just return success message here - */ + if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } for (i = 0; i < vfl->num_elements; i++) { if (vfl->vlan_id[i] > ICE_MAX_VLANID) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(&pf->pdev->dev, - "invalid VF VLAN id %d\n", vfl->vlan_id[i]); + dev_err(dev, "invalid VF VLAN id %d\n", + vfl->vlan_id[i]); goto error_param; } } @@ -2502,15 +2821,17 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } - if (vsi->info.pvid) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; + if (add_v && !ice_is_vf_trusted(vf) && + vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { + dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", + vf->vf_id); + /* There is no need to let VF know about being not trusted, + * so we can just return success message here + */ goto error_param; } - if (ice_vsi_manage_vlan_stripping(vsi, add_v)) { - dev_err(&pf->pdev->dev, - "%sable VLAN stripping failed for VSI %i\n", - add_v ? "en" : "dis", vsi->vsi_num); + if (vsi->info.pvid) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -2524,9 +2845,8 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) u16 vid = vfl->vlan_id[i]; if (!ice_is_vf_trusted(vf) && - vf->num_vlan >= ICE_MAX_VLAN_PER_VF) { - dev_info(&pf->pdev->dev, - "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", + vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { + dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); /* There is no need to let VF know about being * not trusted, so we can just return success @@ -2535,19 +2855,26 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } - if (ice_vsi_add_vlan(vsi, vid)) { + /* we add VLAN 0 by default for each VF so we can enable + * Tx VLAN anti-spoof without triggering MDD events so + * we don't need to add it again here + */ + if (!vid) + continue; + + status = ice_vsi_add_vlan(vsi, vid); + if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vf->num_vlan++; + vsi->num_vlan++; /* Enable VLAN pruning when VLAN is added */ if (!vlan_promisc) { status = ice_cfg_vlan_pruning(vsi, true, false); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(&pf->pdev->dev, - "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", + dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", vid, status); goto error_param; } @@ -2560,8 +2887,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) promisc_m, vid); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(&pf->pdev->dev, - "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", + dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", vid, status); } } @@ -2576,21 +2902,30 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) */ int num_vf_vlan; - num_vf_vlan = vf->num_vlan; + num_vf_vlan = vsi->num_vlan; for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) { u16 vid = vfl->vlan_id[i]; + /* we add VLAN 0 by default for each VF so we can enable + * Tx VLAN anti-spoof without triggering MDD events so + * we don't want a VIRTCHNL request to remove it + */ + if (!vid) + continue; + /* Make sure ice_vsi_kill_vlan is successful before * updating VLAN information */ - if (ice_vsi_kill_vlan(vsi, vid)) { + status = ice_vsi_kill_vlan(vsi, vid); + if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vf->num_vlan--; - /* Disable VLAN pruning when removing VLAN */ - ice_cfg_vlan_pruning(vsi, false, false); + vsi->num_vlan--; + /* Disable VLAN pruning when the last VLAN is removed */ + if (!vsi->num_vlan) + ice_cfg_vlan_pruning(vsi, false, false); /* Disable Unicast/Multicast VLAN promiscuous mode */ if (vlan_promisc) { @@ -2654,6 +2989,11 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf) goto error_param; } + if (!ice_vf_vlan_offload_ena(vf->driver_caps)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + vsi = pf->vsi[vf->lan_vsi_idx]; if (ice_vsi_manage_vlan_stripping(vsi, true)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2680,6 +3020,11 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf) goto error_param; } + if (!ice_vf_vlan_offload_ena(vf->driver_caps)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2695,6 +3040,33 @@ error_param: } /** + * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization + * @vf: VF to enable/disable VLAN stripping for on initialization + * + * If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if + * the flag is cleared then we want to disable stripping. For example, the flag + * will be cleared when port VLANs are configured by the administrator before + * passing the VF to the guest or if the AVF driver doesn't support VLAN + * offloads. + */ +static int ice_vf_init_vlan_stripping(struct ice_vf *vf) +{ + struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; + + if (!vsi) + return -EINVAL; + + /* don't modify stripping if port VLAN is configured */ + if (vsi->info.pvid) + return 0; + + if (ice_vf_vlan_offload_ena(vf->driver_caps)) + return ice_vsi_manage_vlan_stripping(vsi, true); + else + return ice_vsi_manage_vlan_stripping(vsi, false); +} + +/** * ice_vc_process_vf_msg - Process request from VF * @pf: pointer to the PF structure * @event: pointer to the AQ event @@ -2709,9 +3081,11 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) u16 msglen = event->msg_len; u8 *msg = event->msg_buf; struct ice_vf *vf = NULL; + struct device *dev; int err = 0; - if (vf_id >= pf->num_alloc_vfs) { + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) { err = -EINVAL; goto error_handler; } @@ -2731,27 +3105,13 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) err = -EPERM; else err = -EINVAL; - goto error_handler; - } - - /* Perform additional checks specific to RSS and Virtchnl */ - if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) { - struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; - - if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) - err = -EINVAL; - } else if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_LUT) { - struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; - - if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) - err = -EINVAL; } error_handler: if (err) { ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); - dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", + dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", vf_id, v_opcode, msglen, err); return; } @@ -2762,6 +3122,10 @@ error_handler: break; case VIRTCHNL_OP_GET_VF_RESOURCES: err = ice_vc_get_vf_res_msg(vf, msg); + if (ice_vf_init_vlan_stripping(vf)) + dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n", + vf->vf_id); + ice_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: ice_vc_reset_vf_msg(vf); @@ -2811,8 +3175,8 @@ error_handler: break; case VIRTCHNL_OP_UNKNOWN: default: - dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", - v_opcode, vf_id); + dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode, + vf_id); err = ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, 0); @@ -2822,8 +3186,7 @@ error_handler: /* Helper function cares less about error return values here * as it is busy with pending work. */ - dev_info(&pf->pdev->dev, - "PF failed to honor VF %d, opcode %d, error %d\n", + dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n", vf_id, v_opcode, err); } } @@ -2839,24 +3202,18 @@ error_handler: int ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi; struct ice_vf *vf; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - netdev_err(netdev, "invalid VF id: %d\n", vf_id); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id); + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } ivi->vf = vf_id; ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr); @@ -2880,66 +3237,20 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) } /** - * ice_set_vf_spoofchk - * @netdev: network interface device structure - * @vf_id: VF identifier - * @ena: flag to enable or disable feature + * ice_wait_on_vf_reset + * @vf: The VF being resseting * - * Enable or disable VF spoof checking + * Poll to make sure a given VF is ready after reset */ -int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) +static void ice_wait_on_vf_reset(struct ice_vf *vf) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; - struct ice_vsi_ctx *ctx; - enum ice_status status; - struct ice_vf *vf; - int ret = 0; - - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - netdev_err(netdev, "invalid VF id: %d\n", vf_id); - return -EINVAL; - } - - vf = &pf->vf[vf_id]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id); - return -EBUSY; - } - - if (ena == vf->spoofchk) { - dev_dbg(&pf->pdev->dev, "VF spoofchk already %s\n", - ena ? "ON" : "OFF"); - return 0; - } - - ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); - - if (ena) { - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; - ctx->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M; - } + int i; - status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL); - if (status) { - dev_dbg(&pf->pdev->dev, - "Error %d, failed to update VSI* parameters\n", status); - ret = -EIO; - goto out; + for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) { + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) + break; + msleep(20); } - - vf->spoofchk = ena; - vsi->info.sec_flags = ctx->info.sec_flags; - vsi->info.sw_flags2 = ctx->info.sw_flags2; -out: - devm_kfree(&pf->pdev->dev, ctx); - return ret; } /** @@ -2952,23 +3263,25 @@ out: */ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; int ret = 0; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - netdev_err(netdev, "invalid VF id: %d\n", vf_id); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id); + /* Don't set MAC on disabled VF */ + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + /* In case VF is in reset mode, wait until it is completed. Depending + * on factors like queue disabling routine, this could take ~250ms + */ + ice_wait_on_vf_reset(vf); + + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) { netdev_err(netdev, "%pM not a valid unicast address\n", mac); @@ -2982,11 +3295,10 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) */ ether_addr_copy(vf->dflt_lan_addr.addr, mac); vf->pf_set_mac = true; - netdev_info(netdev, - "MAC on VF %d set to %pM. VF driver will be reinitialized\n", + netdev_info(netdev, "MAC on VF %d set to %pM. VF driver will be reinitialized\n", vf_id, mac); - ice_vc_dis_vf(vf); + ice_vc_reset_vf(vf); return ret; } @@ -3000,30 +3312,32 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) */ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id); + /* Don't set Trusted Mode on disabled VF */ + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + /* In case VF is in reset mode, wait until it is completed. Depending + * on factors like queue disabling routine, this could take ~250ms + */ + ice_wait_on_vf_reset(vf); + + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } /* Check if already trusted */ if (trusted == vf->trusted) return 0; vf->trusted = trusted; - ice_vc_dis_vf(vf); - dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", + ice_vc_reset_vf(vf); + dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", vf_id, trusted ? "" : "un"); return 0; @@ -3039,34 +3353,19 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) */ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; - struct virtchnl_pf_event pfe = { 0 }; - struct ice_link_status *ls; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; - struct ice_hw *hw; - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; - hw = &pf->hw; - ls = &pf->hw.port_info->phy.link_info; - - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(&pf->pdev->dev, "vf %d in reset. Try again.\n", vf_id); + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } - - pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; - pfe.severity = PF_EVENT_SEVERITY_INFO; switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: vf->link_forced = false; - vf->link_up = ls->link_info & ICE_AQ_LINK_UP; break; case IFLA_VF_LINK_STATE_ENABLE: vf->link_forced = true; @@ -3080,15 +3379,52 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) return -EINVAL; } - if (vf->link_forced) - ice_set_pfe_link_forced(vf, &pfe, vf->link_up); - else - ice_set_pfe_link(vf, &pfe, ls->link_speed, vf->link_up); + ice_vc_notify_vf_link_state(vf); - /* Notify the VF of its new link state */ - ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, - VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, - sizeof(pfe), NULL); + return 0; +} + +/** + * ice_get_vf_stats - populate some stats for the VF + * @netdev: the netdev of the PF + * @vf_id: the host OS identifier (0-255) + * @vf_stats: pointer to the OS memory to be initialized + */ +int ice_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_eth_stats *stats; + struct ice_vsi *vsi; + struct ice_vf *vf; + + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + + if (ice_check_vf_init(pf, vf)) + return -EBUSY; + + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) + return -EINVAL; + + ice_update_eth_stats(vsi); + stats = &vsi->eth_stats; + + memset(vf_stats, 0, sizeof(*vf_stats)); + + vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast + + stats->rx_multicast; + vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast + + stats->tx_multicast; + vf_stats->rx_bytes = stats->rx_bytes; + vf_stats->tx_bytes = stats->tx_bytes; + vf_stats->broadcast = stats->rx_broadcast; + vf_stats->multicast = stats->rx_multicast; + vf_stats->rx_dropped = stats->rx_discards; + vf_stats->tx_dropped = stats->tx_discards; return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index c3ca522c245a..4647d636ed36 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -15,26 +15,42 @@ #define ICE_MAX_MACADDR_PER_VF 12 /* Malicious Driver Detection */ -#define ICE_DFLT_NUM_MDD_EVENTS_ALLOWED 3 #define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED 10 +#define ICE_MDD_EVENTS_THRESHOLD 30 /* Static VF transaction/status register def */ #define VF_DEVICE_STATUS 0xAA #define VF_TRANS_PENDING_M 0x20 +/* wait defines for polling PF_PCI_CIAD register status */ +#define ICE_PCI_CIAD_WAIT_COUNT 100 +#define ICE_PCI_CIAD_WAIT_DELAY_US 1 + +/* VF resources default values and limitation */ +#define ICE_MAX_VF_COUNT 256 +#define ICE_MAX_QS_PER_VF 256 +#define ICE_MIN_QS_PER_VF 1 +#define ICE_DFLT_QS_PER_VF 4 +#define ICE_NONQ_VECS_VF 1 +#define ICE_MAX_SCATTER_QS_PER_VF 16 +#define ICE_MAX_BASE_QS_PER_VF 16 +#define ICE_MAX_INTR_PER_VF 65 +#define ICE_MAX_POLICY_INTR_PER_VF 33 +#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) +#define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) +#define ICE_MAX_VF_RESET_WAIT 15 + +#define ice_for_each_vf(pf, i) \ + for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++) + /* Specific VF states */ enum ice_vf_states { - ICE_VF_STATE_INIT = 0, - ICE_VF_STATE_ACTIVE, - ICE_VF_STATE_ENA, + ICE_VF_STATE_INIT = 0, /* PF is initializing VF */ + ICE_VF_STATE_ACTIVE, /* VF resources are allocated for use */ + ICE_VF_STATE_QS_ENA, /* VF queue(s) enabled */ ICE_VF_STATE_DIS, ICE_VF_STATE_MC_PROMISC, ICE_VF_STATE_UC_PROMISC, - /* state to indicate if PF needs to do vector assignment for VF. - * This needs to be set during first time VF initialization or later - * when VF asks for more Vectors through virtchnl OP. - */ - ICE_VF_STATE_CFG_INTR, ICE_VF_STATES_NBITS }; @@ -50,11 +66,14 @@ struct ice_vf { s16 vf_id; /* VF ID in the PF space */ u16 lan_vsi_idx; /* index into PF struct */ - int first_vector_idx; /* first vector index of this VF */ + /* first vector index of this VF in the PF space */ + int first_vector_idx; struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ struct virtchnl_version_info vf_ver; u32 driver_caps; /* reported by VF driver */ struct virtchnl_ether_addr dflt_lan_addr; + DECLARE_BITMAP(txq_ena, ICE_MAX_BASE_QS_PER_VF); + DECLARE_BITMAP(rxq_ena, ICE_MAX_BASE_QS_PER_VF); u16 port_vlan_id; u8 pf_set_mac:1; /* VF MAC address set by VMM admin */ u8 trusted:1; @@ -75,8 +94,8 @@ struct ice_vf { unsigned long vf_caps; /* VF's adv. capabilities */ u8 num_req_qs; /* num of queue pairs requested by VF */ u16 num_mac; - u16 num_vlan; u16 num_vf_qs; /* num of queue configured per VF */ + u16 num_qs_ena; /* total num of Tx/Rx queue enabled */ }; #ifdef CONFIG_PCI_IOV @@ -103,12 +122,18 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena); int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); + +void ice_set_vf_state_qs_dis(struct ice_vf *vf); +int +ice_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats); #else /* CONFIG_PCI_IOV */ #define ice_process_vflr_event(pf) do {} while (0) #define ice_free_vfs(pf) do {} while (0) #define ice_vc_process_vf_msg(pf, event) do {} while (0) #define ice_vc_notify_link_state(pf) do {} while (0) #define ice_vc_notify_reset(pf) do {} while (0) +#define ice_set_vf_state_qs_dis(vf) do {} while (0) static inline bool ice_reset_all_vfs(struct ice_pf __always_unused *pf, @@ -174,5 +199,13 @@ ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, { return 0; } + +static inline int +ice_get_vf_stats(struct net_device __always_unused *netdev, + int __always_unused vf_id, + struct ifla_vf_stats __always_unused *vf_stats) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c new file mode 100644 index 000000000000..4d3407bbd4c4 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -0,0 +1,1182 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include <linux/bpf_trace.h> +#include <net/xdp_sock.h> +#include <net/xdp.h> +#include "ice.h" +#include "ice_base.h" +#include "ice_type.h" +#include "ice_xsk.h" +#include "ice_txrx.h" +#include "ice_txrx_lib.h" +#include "ice_lib.h" + +/** + * ice_qp_reset_stats - Resets all stats for rings of given index + * @vsi: VSI that contains rings of interest + * @q_idx: ring index in array + */ +static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) +{ + memset(&vsi->rx_rings[q_idx]->rx_stats, 0, + sizeof(vsi->rx_rings[q_idx]->rx_stats)); + memset(&vsi->tx_rings[q_idx]->stats, 0, + sizeof(vsi->tx_rings[q_idx]->stats)); + if (ice_is_xdp_ena_vsi(vsi)) + memset(&vsi->xdp_rings[q_idx]->stats, 0, + sizeof(vsi->xdp_rings[q_idx]->stats)); +} + +/** + * ice_qp_clean_rings - Cleans all the rings of a given index + * @vsi: VSI that contains rings of interest + * @q_idx: ring index in array + */ +static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) +{ + ice_clean_tx_ring(vsi->tx_rings[q_idx]); + if (ice_is_xdp_ena_vsi(vsi)) + ice_clean_tx_ring(vsi->xdp_rings[q_idx]); + ice_clean_rx_ring(vsi->rx_rings[q_idx]); +} + +/** + * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector + * @vsi: VSI that has netdev + * @q_vector: q_vector that has NAPI context + * @enable: true for enable, false for disable + */ +static void +ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, + bool enable) +{ + if (!vsi->netdev || !q_vector) + return; + + if (enable) + napi_enable(&q_vector->napi); + else + napi_disable(&q_vector->napi); +} + +/** + * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring + * @vsi: the VSI that contains queue vector being un-configured + * @rx_ring: Rx ring that will have its IRQ disabled + * @q_vector: queue vector + */ +static void +ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring, + struct ice_q_vector *q_vector) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int base = vsi->base_vector; + u16 reg; + u32 val; + + /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle + * here only QINT_RQCTL + */ + reg = rx_ring->reg_idx; + val = rd32(hw, QINT_RQCTL(reg)); + val &= ~QINT_RQCTL_CAUSE_ENA_M; + wr32(hw, QINT_RQCTL(reg), val); + + if (q_vector) { + u16 v_idx = q_vector->v_idx; + + wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0); + ice_flush(hw); + synchronize_irq(pf->msix_entries[v_idx + base].vector); + } +} + +/** + * ice_qvec_cfg_msix - Enable IRQ for given queue vector + * @vsi: the VSI that contains queue vector + * @q_vector: queue vector + */ +static void +ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +{ + u16 reg_idx = q_vector->reg_idx; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + struct ice_ring *ring; + + ice_cfg_itr(hw, q_vector); + + wr32(hw, GLINT_RATE(reg_idx), + ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); + + ice_for_each_ring(ring, q_vector->tx) + ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx, + q_vector->tx.itr_idx); + + ice_for_each_ring(ring, q_vector->rx) + ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx, + q_vector->rx.itr_idx); + + ice_flush(hw); +} + +/** + * ice_qvec_ena_irq - Enable IRQ for given queue vector + * @vsi: the VSI that contains queue vector + * @q_vector: queue vector + */ +static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + + ice_irq_dynamic_ena(hw, vsi, q_vector); + + ice_flush(hw); +} + +/** + * ice_qp_dis - Disables a queue pair + * @vsi: VSI of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) +{ + struct ice_txq_meta txq_meta = { }; + struct ice_ring *tx_ring, *rx_ring; + struct ice_q_vector *q_vector; + int timeout = 50; + int err; + + if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) + return -EINVAL; + + tx_ring = vsi->tx_rings[q_idx]; + rx_ring = vsi->rx_rings[q_idx]; + q_vector = rx_ring->q_vector; + + while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + + ice_fill_txq_meta(vsi, tx_ring, &txq_meta); + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); + if (err) + return err; + if (ice_is_xdp_ena_vsi(vsi)) { + struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + + memset(&txq_meta, 0, sizeof(txq_meta)); + ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, + &txq_meta); + if (err) + return err; + } + err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx); + if (err) + return err; + + ice_qvec_toggle_napi(vsi, q_vector, false); + ice_qp_clean_rings(vsi, q_idx); + ice_qp_reset_stats(vsi, q_idx); + + return 0; +} + +/** + * ice_qp_ena - Enables a queue pair + * @vsi: VSI of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) +{ + struct ice_aqc_add_tx_qgrp *qg_buf; + struct ice_ring *tx_ring, *rx_ring; + struct ice_q_vector *q_vector; + int err; + + if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) + return -EINVAL; + + qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL); + if (!qg_buf) + return -ENOMEM; + + qg_buf->num_txqs = 1; + + tx_ring = vsi->tx_rings[q_idx]; + rx_ring = vsi->rx_rings[q_idx]; + q_vector = rx_ring->q_vector; + + err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf); + if (err) + goto free_buf; + + if (ice_is_xdp_ena_vsi(vsi)) { + struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + + memset(qg_buf, 0, sizeof(*qg_buf)); + qg_buf->num_txqs = 1; + err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf); + if (err) + goto free_buf; + ice_set_ring_xdp(xdp_ring); + xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring); + } + + err = ice_setup_rx_ctx(rx_ring); + if (err) + goto free_buf; + + ice_qvec_cfg_msix(vsi, q_vector); + + err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx); + if (err) + goto free_buf; + + clear_bit(__ICE_CFG_BUSY, vsi->state); + ice_qvec_toggle_napi(vsi, q_vector, true); + ice_qvec_ena_irq(vsi, q_vector); + + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); +free_buf: + kfree(qg_buf); + return err; +} + +/** + * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket + * @vsi: VSI to allocate the UMEM on + * + * Returns 0 on success, negative on error + */ +static int ice_xsk_alloc_umems(struct ice_vsi *vsi) +{ + if (vsi->xsk_umems) + return 0; + + vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems), + GFP_KERNEL); + + if (!vsi->xsk_umems) { + vsi->num_xsk_umems = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * ice_xsk_add_umem - add a UMEM region for XDP sockets + * @vsi: VSI to which the UMEM will be added + * @umem: pointer to a requested UMEM region + * @qid: queue ID + * + * Returns 0 on success, negative on error + */ +static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid) +{ + int err; + + err = ice_xsk_alloc_umems(vsi); + if (err) + return err; + + vsi->xsk_umems[qid] = umem; + vsi->num_xsk_umems_used++; + + return 0; +} + +/** + * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid + * @vsi: VSI from which the VSI will be removed + * @qid: Ring/qid associated with the UMEM + */ +static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid) +{ + vsi->xsk_umems[qid] = NULL; + vsi->num_xsk_umems_used--; + + if (vsi->num_xsk_umems_used == 0) { + kfree(vsi->xsk_umems); + vsi->xsk_umems = NULL; + vsi->num_xsk_umems = 0; + } +} + +/** + * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets + * @vsi: VSI to map the UMEM region + * @umem: UMEM to map + * + * Returns 0 on success, negative on error + */ +static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + unsigned int i; + + dev = ice_pf_to_dev(pf); + for (i = 0; i < umem->npgs; i++) { + dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0, + PAGE_SIZE, + DMA_BIDIRECTIONAL, + ICE_RX_DMA_ATTR); + if (dma_mapping_error(dev, dma)) { + dev_dbg(dev, "XSK UMEM DMA mapping error on page num %d\n", + i); + goto out_unmap; + } + + umem->pages[i].dma = dma; + } + + return 0; + +out_unmap: + for (; i > 0; i--) { + dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR); + umem->pages[i].dma = 0; + } + + return -EFAULT; +} + +/** + * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets + * @vsi: VSI from which the UMEM will be unmapped + * @umem: UMEM to unmap + */ +static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + unsigned int i; + + dev = ice_pf_to_dev(pf); + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR); + + umem->pages[i].dma = 0; + } +} + +/** + * ice_xsk_umem_disable - disable a UMEM region + * @vsi: Current VSI + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid) +{ + if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems || + !vsi->xsk_umems[qid]) + return -EINVAL; + + ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]); + ice_xsk_remove_umem(vsi, qid); + + return 0; +} + +/** + * ice_xsk_umem_enable - enable a UMEM region + * @vsi: Current VSI + * @umem: pointer to a requested UMEM region + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +static int +ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid) +{ + struct xdp_umem_fq_reuse *reuseq; + int err; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + if (!vsi->num_xsk_umems) + vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq); + if (qid >= vsi->num_xsk_umems) + return -EINVAL; + + if (vsi->xsk_umems && vsi->xsk_umems[qid]) + return -EBUSY; + + reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count); + if (!reuseq) + return -ENOMEM; + + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + err = ice_xsk_umem_dma_map(vsi, umem); + if (err) + return err; + + err = ice_xsk_add_umem(vsi, umem, qid); + if (err) + return err; + + return 0; +} + +/** + * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state + * @vsi: Current VSI + * @umem: UMEM to enable/associate to a ring, NULL to disable + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid) +{ + bool if_running, umem_present = !!umem; + int ret = 0, umem_failure = 0; + + if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); + + if (if_running) { + ret = ice_qp_dis(vsi, qid); + if (ret) { + netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret); + goto xsk_umem_if_up; + } + } + + umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) : + ice_xsk_umem_disable(vsi, qid); + +xsk_umem_if_up: + if (if_running) { + ret = ice_qp_ena(vsi, qid); + if (!ret && umem_present) + napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); + else if (ret) + netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret); + } + + if (umem_failure) { + netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d", + umem_present ? "en" : "dis", umem_failure); + return umem_failure; + } + + return ret; +} + +/** + * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations + * @zca: zero-cpoy allocator + * @handle: Buffer handle + */ +void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle) +{ + struct ice_rx_buf *rx_buf; + struct ice_ring *rx_ring; + struct xdp_umem *umem; + u64 hr, mask; + u16 nta; + + rx_ring = container_of(zca, struct ice_ring, zca); + umem = rx_ring->xsk_umem; + hr = umem->headroom + XDP_PACKET_HEADROOM; + + mask = umem->chunk_mask; + + nta = rx_ring->next_to_alloc; + rx_buf = &rx_ring->rx_buf[nta]; + + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + handle &= mask; + + rx_buf->dma = xdp_umem_get_dma(umem, handle); + rx_buf->dma += hr; + + rx_buf->addr = xdp_umem_get_data(umem, handle); + rx_buf->addr += hr; + + rx_buf->handle = (u64)handle + umem->headroom; +} + +/** + * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem + * @rx_ring: ring with an xdp_umem bound to it + * @rx_buf: buffer to which xsk page address will be assigned + * + * This function allocates an Rx buffer in the hot path. + * The buffer can come from fill queue or recycle queue. + * + * Returns true if an assignment was successful, false if not. + */ +static __always_inline bool +ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +{ + struct xdp_umem *umem = rx_ring->xsk_umem; + void *addr = rx_buf->addr; + u64 handle, hr; + + if (addr) { + rx_ring->rx_stats.page_reuse_count++; + return true; + } + + if (!xsk_umem_peek_addr(umem, &handle)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + hr = umem->headroom + XDP_PACKET_HEADROOM; + + rx_buf->dma = xdp_umem_get_dma(umem, handle); + rx_buf->dma += hr; + + rx_buf->addr = xdp_umem_get_data(umem, handle); + rx_buf->addr += hr; + + rx_buf->handle = handle + umem->headroom; + + xsk_umem_release_addr(umem); + return true; +} + +/** + * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem + * @rx_ring: ring with an xdp_umem bound to it + * @rx_buf: buffer to which xsk page address will be assigned + * + * This function allocates an Rx buffer in the slow path. + * The buffer can come from fill queue or recycle queue. + * + * Returns true if an assignment was successful, false if not. + */ +static __always_inline bool +ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +{ + struct xdp_umem *umem = rx_ring->xsk_umem; + u64 handle, headroom; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + handle &= umem->chunk_mask; + headroom = umem->headroom + XDP_PACKET_HEADROOM; + + rx_buf->dma = xdp_umem_get_dma(umem, handle); + rx_buf->dma += headroom; + + rx_buf->addr = xdp_umem_get_data(umem, handle); + rx_buf->addr += headroom; + + rx_buf->handle = handle + umem->headroom; + + xsk_umem_release_addr_rq(umem); + return true; +} + +/** + * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * @alloc: the function pointer to call for allocation + * + * This function allocates a number of Rx buffers from the fill ring + * or the internal recycle mechanism and places them on the Rx ring. + * + * Returns false if all allocations were successful, true if any fail. + */ +static bool +ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count, + bool alloc(struct ice_ring *, struct ice_rx_buf *)) +{ + union ice_32b_rx_flex_desc *rx_desc; + u16 ntu = rx_ring->next_to_use; + struct ice_rx_buf *rx_buf; + bool ret = false; + + if (!count) + return false; + + rx_desc = ICE_RX_DESC(rx_ring, ntu); + rx_buf = &rx_ring->rx_buf[ntu]; + + do { + if (!alloc(rx_ring, rx_buf)) { + ret = true; + break; + } + + dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0, + rx_ring->rx_buf_len, + DMA_BIDIRECTIONAL); + + rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma); + rx_desc->wb.status_error0 = 0; + + rx_desc++; + rx_buf++; + ntu++; + + if (unlikely(ntu == rx_ring->count)) { + rx_desc = ICE_RX_DESC(rx_ring, 0); + rx_buf = rx_ring->rx_buf; + ntu = 0; + } + } while (--count); + + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); + + return ret; +} + +/** + * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path + * @rx_ring: Rx ring + * @count: number of bufs to allocate + * + * Returns false on success, true on failure. + */ +static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count) +{ + return ice_alloc_rx_bufs_zc(rx_ring, count, + ice_alloc_buf_fast_zc); +} + +/** + * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path + * @rx_ring: Rx ring + * @count: number of bufs to allocate + * + * Returns false on success, true on failure. + */ +bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count) +{ + return ice_alloc_rx_bufs_zc(rx_ring, count, + ice_alloc_buf_slow_zc); +} + +/** + * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring + * @rx_ring: Rx ring + */ +static void ice_bump_ntc(struct ice_ring *rx_ring) +{ + int ntc = rx_ring->next_to_clean + 1; + + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + prefetch(ICE_RX_DESC(rx_ring, ntc)); +} + +/** + * ice_get_rx_buf_zc - Fetch the current Rx buffer + * @rx_ring: Rx ring + * @size: size of a buffer + * + * This function returns the current, received Rx buffer and does + * DMA synchronization. + * + * Returns a pointer to the received Rx buffer. + */ +static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size) +{ + struct ice_rx_buf *rx_buf; + + rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + + dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0, + size, DMA_BIDIRECTIONAL); + + return rx_buf; +} + +/** + * ice_reuse_rx_buf_zc - reuse an Rx buffer + * @rx_ring: Rx ring + * @old_buf: The buffer to recycle + * + * This function recycles a finished Rx buffer, and places it on the recycle + * queue (next_to_alloc). + */ +static void +ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) +{ + unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask; + u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; + u16 nta = rx_ring->next_to_alloc; + struct ice_rx_buf *new_buf; + + new_buf = &rx_ring->rx_buf[nta++]; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + new_buf->dma = old_buf->dma & mask; + new_buf->dma += hr; + + new_buf->addr = (void *)((unsigned long)old_buf->addr & mask); + new_buf->addr += hr; + + new_buf->handle = old_buf->handle & mask; + new_buf->handle += rx_ring->xsk_umem->headroom; + + old_buf->addr = NULL; +} + +/** + * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer + * @rx_ring: Rx ring + * @rx_buf: zero-copy Rx buffer + * @xdp: XDP buffer + * + * This function allocates a new skb from a zero-copy Rx buffer. + * + * Returns the skb on success, NULL on failure. + */ +static struct sk_buff * +ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + unsigned int datasize_hard = xdp->data_end - + xdp->data_hard_start; + struct sk_buff *skb; + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); + if (metasize) + skb_metadata_set(skb, metasize); + + ice_reuse_rx_buf_zc(rx_ring, rx_buf); + + return skb; +} + +/** + * ice_run_xdp_zc - Executes an XDP program in zero-copy path + * @rx_ring: Rx ring + * @xdp: xdp_buff used as input to the XDP program + * + * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} + */ +static int +ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) +{ + int err, result = ICE_XDP_PASS; + struct bpf_prog *xdp_prog; + struct ice_ring *xdp_ring; + u32 act; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + return ICE_XDP_PASS; + } + + act = bpf_prog_run_xdp(xdp_prog, xdp); + xdp->handle += xdp->data - xdp->data_hard_start; + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index]; + result = ice_xmit_xdp_buff(xdp, xdp_ring); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fallthrough -- not supported action */ + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping frame */ + case XDP_DROP: + result = ICE_XDP_CONSUMED; + break; + } + + rcu_read_unlock(); + return result; +} + +/** + * ice_clean_rx_irq_zc - consumes packets from the hardware ring + * @rx_ring: AF_XDP Rx ring + * @budget: NAPI budget + * + * Returns number of processed packets on success, remaining budget on failure. + */ +int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_packets = 0; + u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + unsigned int xdp_xmit = 0; + struct xdp_buff xdp; + bool failure = 0; + + xdp.rxq = &rx_ring->xdp_rxq; + + while (likely(total_rx_packets < (unsigned int)budget)) { + union ice_32b_rx_flex_desc *rx_desc; + unsigned int size, xdp_res = 0; + struct ice_rx_buf *rx_buf; + struct sk_buff *skb; + u16 stat_err_bits; + u16 vlan_tag = 0; + u8 rx_ptype; + + if (cleaned_count >= ICE_RX_BUF_WRITE) { + failure |= ice_alloc_rx_bufs_fast_zc(rx_ring, + cleaned_count); + cleaned_count = 0; + } + + rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); + if (!ice_test_staterr(rx_desc, stat_err_bits)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we have + * verified the descriptor has been written back. + */ + dma_rmb(); + + size = le16_to_cpu(rx_desc->wb.pkt_len) & + ICE_RX_FLX_DESC_PKT_LEN_M; + if (!size) + break; + + rx_buf = ice_get_rx_buf_zc(rx_ring, size); + if (!rx_buf->addr) + break; + + xdp.data = rx_buf->addr; + xdp.data_meta = xdp.data; + xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; + xdp.data_end = xdp.data + size; + xdp.handle = rx_buf->handle; + + xdp_res = ice_run_xdp_zc(rx_ring, &xdp); + if (xdp_res) { + if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { + xdp_xmit |= xdp_res; + rx_buf->addr = NULL; + } else { + ice_reuse_rx_buf_zc(rx_ring, rx_buf); + } + + total_rx_bytes += size; + total_rx_packets++; + cleaned_count++; + + ice_bump_ntc(rx_ring); + continue; + } + + /* XDP_PASS path */ + skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp); + if (!skb) { + rx_ring->rx_stats.alloc_buf_failed++; + break; + } + + cleaned_count++; + ice_bump_ntc(rx_ring); + + if (eth_skb_pad(skb)) { + skb = NULL; + continue; + } + + total_rx_bytes += skb->len; + total_rx_packets++; + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); + if (ice_test_staterr(rx_desc, stat_err_bits)) + vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); + + rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & + ICE_RX_FLEX_DESC_PTYPE_M; + + ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + ice_receive_skb(rx_ring, skb, vlan_tag); + } + + ice_finalize_xdp_rx(rx_ring, xdp_xmit); + ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); + + return failure ? budget : (int)total_rx_packets; +} + +/** + * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries + * @xdp_ring: XDP Tx ring + * @budget: max number of frames to xmit + * + * Returns true if cleanup/transmission is done. + */ +static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget) +{ + struct ice_tx_desc *tx_desc = NULL; + bool work_done = true; + struct xdp_desc desc; + dma_addr_t dma; + + while (likely(budget-- > 0)) { + struct ice_tx_buf *tx_buf; + + if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) { + xdp_ring->tx_stats.tx_busy++; + work_done = false; + break; + } + + tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; + + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc)) + break; + + dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr); + + dma_sync_single_for_device(xdp_ring->dev, dma, desc.len, + DMA_BIDIRECTIONAL); + + tx_buf->bytecount = desc.len; + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, + 0, desc.len, 0); + + xdp_ring->next_to_use++; + if (xdp_ring->next_to_use == xdp_ring->count) + xdp_ring->next_to_use = 0; + } + + if (tx_desc) { + ice_xdp_ring_update_tail(xdp_ring); + xsk_umem_consume_tx_done(xdp_ring->xsk_umem); + } + + return budget > 0 && work_done; +} + +/** + * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer + * @xdp_ring: XDP Tx ring + * @tx_buf: Tx buffer to clean + */ +static void +ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf) +{ + xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); +} + +/** + * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries + * @xdp_ring: XDP Tx ring + * @budget: NAPI budget + * + * Returns true if cleanup/tranmission is done. + */ +bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget) +{ + int total_packets = 0, total_bytes = 0; + s16 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + u32 xsk_frames = 0; + bool xmit_done; + + tx_desc = ICE_TX_DESC(xdp_ring, ntc); + tx_buf = &xdp_ring->tx_buf[ntc]; + ntc -= xdp_ring->count; + + do { + if (!(tx_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) + break; + + total_bytes += tx_buf->bytecount; + total_packets++; + + if (tx_buf->raw_buf) { + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + tx_buf->raw_buf = NULL; + } else { + xsk_frames++; + } + + tx_desc->cmd_type_offset_bsz = 0; + tx_buf++; + tx_desc++; + ntc++; + + if (unlikely(!ntc)) { + ntc -= xdp_ring->count; + tx_buf = xdp_ring->tx_buf; + tx_desc = ICE_TX_DESC(xdp_ring, 0); + } + + prefetch(tx_desc); + + } while (likely(--budget)); + + ntc += xdp_ring->count; + xdp_ring->next_to_clean = ntc; + + if (xsk_frames) + xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames); + + ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes); + xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK); + + return budget > 0 && xmit_done; +} + +/** + * ice_xsk_wakeup - Implements ndo_xsk_wakeup + * @netdev: net_device + * @queue_id: queue to wake up + * @flags: ignored in our case, since we have Rx and Tx in the same NAPI + * + * Returns negative on error, zero otherwise. + */ +int +ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, + u32 __always_unused flags) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_q_vector *q_vector; + struct ice_vsi *vsi = np->vsi; + struct ice_ring *ring; + + if (test_bit(__ICE_DOWN, vsi->state)) + return -ENETDOWN; + + if (!ice_is_xdp_ena_vsi(vsi)) + return -ENXIO; + + if (queue_id >= vsi->num_txq) + return -ENXIO; + + if (!vsi->xdp_rings[queue_id]->xsk_umem) + return -ENXIO; + + ring = vsi->xdp_rings[queue_id]; + + /* The idea here is that if NAPI is running, mark a miss, so + * it will run again. If not, trigger an interrupt and + * schedule the NAPI from interrupt context. If NAPI would be + * scheduled here, the interrupt affinity would not be + * honored. + */ + q_vector = ring->q_vector; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + ice_trigger_sw_intr(&vsi->back->hw, q_vector); + + return 0; +} + +/** + * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached + * @vsi: VSI to be checked + * + * Returns true if any of the Rx rings has an AF_XDP UMEM attached + */ +bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) +{ + int i; + + if (!vsi->xsk_umems) + return false; + + for (i = 0; i < vsi->num_xsk_umems; i++) { + if (vsi->xsk_umems[i]) + return true; + } + + return false; +} + +/** + * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring + * @rx_ring: ring to be cleaned + */ +void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) +{ + u16 i; + + for (i = 0; i < rx_ring->count; i++) { + struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + + if (!rx_buf->addr) + continue; + + xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle); + rx_buf->addr = NULL; + } +} + +/** + * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues + * @xdp_ring: XDP_Tx ring + */ +void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use; + u32 xsk_frames = 0; + + while (ntc != ntu) { + struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc]; + + if (tx_buf->raw_buf) + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + else + xsk_frames++; + + tx_buf->raw_buf = NULL; + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } + + if (xsk_frames) + xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames); +} diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h new file mode 100644 index 000000000000..3479e1de98fe --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_XSK_H_ +#define _ICE_XSK_H_ +#include "ice_txrx.h" +#include "ice.h" + +struct ice_vsi; + +#ifdef CONFIG_XDP_SOCKETS +int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid); +void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle); +int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget); +bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget); +int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); +bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count); +bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); +void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring); +void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring); +#else +static inline int +ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi, + struct xdp_umem __always_unused *umem, + u16 __always_unused qid) +{ + return -ENOTSUPP; +} + +static inline void +ice_zca_free(struct zero_copy_allocator __always_unused *zca, + unsigned long __always_unused handle) +{ +} + +static inline int +ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring, + int __always_unused budget) +{ + return 0; +} + +static inline bool +ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring, + int __always_unused budget) +{ + return false; +} + +static inline bool +ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring, + u16 __always_unused count) +{ + return false; +} + +static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi) +{ + return false; +} + +static inline int +ice_xsk_wakeup(struct net_device __always_unused *netdev, + u32 __always_unused queue_id, u32 __always_unused flags) +{ + return -ENOTSUPP; +} + +#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0) +#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0) +#endif /* CONFIG_XDP_SOCKETS */ +#endif /* !_ICE_XSK_H_ */ |