diff options
Diffstat (limited to 'drivers/net/ethernet/emulex/benet/be_main.c')
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_main.c | 1200 |
1 files changed, 677 insertions, 523 deletions
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e6b790f0d9dc..fb0bc3c3620e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -30,6 +30,9 @@ MODULE_DESCRIPTION(DRV_DESC " " DRV_VER); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); +/* num_vfs module param is obsolete. + * Use sysfs method to enable/disable VFs. + */ static unsigned int num_vfs; module_param(num_vfs, uint, S_IRUGO); MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize"); @@ -727,48 +730,86 @@ static u16 skb_ip_proto(struct sk_buff *skb) ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; } -static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr, - struct sk_buff *skb, u32 wrb_cnt, u32 len, - bool skip_hw_vlan) +static inline bool be_is_txq_full(struct be_tx_obj *txo) { - u16 vlan_tag, proto; + return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len; +} - memset(hdr, 0, sizeof(*hdr)); +static inline bool be_can_txq_wake(struct be_tx_obj *txo) +{ + return atomic_read(&txo->q.used) < txo->q.len / 2; +} - SET_TX_WRB_HDR_BITS(crc, hdr, 1); +static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo) +{ + return atomic_read(&txo->q.used) > txo->pend_wrb_cnt; +} + +static void be_get_wrb_params_from_skb(struct be_adapter *adapter, + struct sk_buff *skb, + struct be_wrb_params *wrb_params) +{ + u16 proto; if (skb_is_gso(skb)) { - SET_TX_WRB_HDR_BITS(lso, hdr, 1); - SET_TX_WRB_HDR_BITS(lso_mss, hdr, skb_shinfo(skb)->gso_size); + BE_WRB_F_SET(wrb_params->features, LSO, 1); + wrb_params->lso_mss = skb_shinfo(skb)->gso_size; if (skb_is_gso_v6(skb) && !lancer_chip(adapter)) - SET_TX_WRB_HDR_BITS(lso6, hdr, 1); + BE_WRB_F_SET(wrb_params->features, LSO6, 1); } else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb->encapsulation) { - SET_TX_WRB_HDR_BITS(ipcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, IPCS, 1); proto = skb_inner_ip_proto(skb); } else { proto = skb_ip_proto(skb); } if (proto == IPPROTO_TCP) - SET_TX_WRB_HDR_BITS(tcpcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, TCPCS, 1); else if (proto == IPPROTO_UDP) - SET_TX_WRB_HDR_BITS(udpcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, UDPCS, 1); } if (skb_vlan_tag_present(skb)) { - SET_TX_WRB_HDR_BITS(vlan, hdr, 1); - vlan_tag = be_get_tx_vlan_tag(adapter, skb); - SET_TX_WRB_HDR_BITS(vlan_tag, hdr, vlan_tag); + BE_WRB_F_SET(wrb_params->features, VLAN, 1); + wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb); } - SET_TX_WRB_HDR_BITS(num_wrb, hdr, wrb_cnt); - SET_TX_WRB_HDR_BITS(len, hdr, len); + BE_WRB_F_SET(wrb_params->features, CRC, 1); +} - /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0 - * When this hack is not needed, the evt bit is set while ringing DB +static void wrb_fill_hdr(struct be_adapter *adapter, + struct be_eth_hdr_wrb *hdr, + struct be_wrb_params *wrb_params, + struct sk_buff *skb) +{ + memset(hdr, 0, sizeof(*hdr)); + + SET_TX_WRB_HDR_BITS(crc, hdr, + BE_WRB_F_GET(wrb_params->features, CRC)); + SET_TX_WRB_HDR_BITS(ipcs, hdr, + BE_WRB_F_GET(wrb_params->features, IPCS)); + SET_TX_WRB_HDR_BITS(tcpcs, hdr, + BE_WRB_F_GET(wrb_params->features, TCPCS)); + SET_TX_WRB_HDR_BITS(udpcs, hdr, + BE_WRB_F_GET(wrb_params->features, UDPCS)); + + SET_TX_WRB_HDR_BITS(lso, hdr, + BE_WRB_F_GET(wrb_params->features, LSO)); + SET_TX_WRB_HDR_BITS(lso6, hdr, + BE_WRB_F_GET(wrb_params->features, LSO6)); + SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss); + + /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this + * hack is not needed, the evt bit is set while ringing DB. */ - if (skip_hw_vlan) - SET_TX_WRB_HDR_BITS(event, hdr, 1); + SET_TX_WRB_HDR_BITS(event, hdr, + BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW)); + SET_TX_WRB_HDR_BITS(vlan, hdr, + BE_WRB_F_GET(wrb_params->features, VLAN)); + SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag); + + SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb)); + SET_TX_WRB_HDR_BITS(len, hdr, skb->len); } static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb, @@ -788,77 +829,124 @@ static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb, } } -/* Returns the number of WRBs used up by the skb */ +/* Grab a WRB header for xmit */ +static u16 be_tx_get_wrb_hdr(struct be_tx_obj *txo) +{ + u16 head = txo->q.head; + + queue_head_inc(&txo->q); + return head; +} + +/* Set up the WRB header for xmit */ +static void be_tx_setup_wrb_hdr(struct be_adapter *adapter, + struct be_tx_obj *txo, + struct be_wrb_params *wrb_params, + struct sk_buff *skb, u16 head) +{ + u32 num_frags = skb_wrb_cnt(skb); + struct be_queue_info *txq = &txo->q; + struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head); + + wrb_fill_hdr(adapter, hdr, wrb_params, skb); + be_dws_cpu_to_le(hdr, sizeof(*hdr)); + + BUG_ON(txo->sent_skb_list[head]); + txo->sent_skb_list[head] = skb; + txo->last_req_hdr = head; + atomic_add(num_frags, &txq->used); + txo->last_req_wrb_cnt = num_frags; + txo->pend_wrb_cnt += num_frags; +} + +/* Setup a WRB fragment (buffer descriptor) for xmit */ +static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr, + int len) +{ + struct be_eth_wrb *wrb; + struct be_queue_info *txq = &txo->q; + + wrb = queue_head_node(txq); + wrb_fill(wrb, busaddr, len); + queue_head_inc(txq); +} + +/* Bring the queue back to the state it was in before be_xmit_enqueue() routine + * was invoked. The producer index is restored to the previous packet and the + * WRBs of the current packet are unmapped. Invoked to handle tx setup errors. + */ +static void be_xmit_restore(struct be_adapter *adapter, + struct be_tx_obj *txo, u16 head, bool map_single, + u32 copied) +{ + struct device *dev; + struct be_eth_wrb *wrb; + struct be_queue_info *txq = &txo->q; + + dev = &adapter->pdev->dev; + txq->head = head; + + /* skip the first wrb (hdr); it's not mapped */ + queue_head_inc(txq); + while (copied) { + wrb = queue_head_node(txq); + unmap_tx_frag(dev, wrb, map_single); + map_single = false; + copied -= le32_to_cpu(wrb->frag_len); + queue_head_inc(txq); + } + + txq->head = head; +} + +/* Enqueue the given packet for transmit. This routine allocates WRBs for the + * packet, dma maps the packet buffers and sets up the WRBs. Returns the number + * of WRBs used up by the packet. + */ static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo, - struct sk_buff *skb, bool skip_hw_vlan) + struct sk_buff *skb, + struct be_wrb_params *wrb_params) { u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb); struct device *dev = &adapter->pdev->dev; struct be_queue_info *txq = &txo->q; - struct be_eth_hdr_wrb *hdr; bool map_single = false; - struct be_eth_wrb *wrb; - dma_addr_t busaddr; u16 head = txq->head; + dma_addr_t busaddr; + int len; - hdr = queue_head_node(txq); - wrb_fill_hdr(adapter, hdr, skb, wrb_cnt, skb->len, skip_hw_vlan); - be_dws_cpu_to_le(hdr, sizeof(*hdr)); - - queue_head_inc(txq); + head = be_tx_get_wrb_hdr(txo); if (skb->len > skb->data_len) { - int len = skb_headlen(skb); + len = skb_headlen(skb); busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, busaddr)) goto dma_err; map_single = true; - wrb = queue_head_node(txq); - wrb_fill(wrb, busaddr, len); - queue_head_inc(txq); + be_tx_setup_wrb_frag(txo, busaddr, len); copied += len; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + len = skb_frag_size(frag); - busaddr = skb_frag_dma_map(dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); + busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, busaddr)) goto dma_err; - wrb = queue_head_node(txq); - wrb_fill(wrb, busaddr, skb_frag_size(frag)); - queue_head_inc(txq); - copied += skb_frag_size(frag); + be_tx_setup_wrb_frag(txo, busaddr, len); + copied += len; } - BUG_ON(txo->sent_skb_list[head]); - txo->sent_skb_list[head] = skb; - txo->last_req_hdr = head; - atomic_add(wrb_cnt, &txq->used); - txo->last_req_wrb_cnt = wrb_cnt; - txo->pend_wrb_cnt += wrb_cnt; + be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head); be_tx_stats_update(txo, skb); return wrb_cnt; dma_err: - /* Bring the queue back to the state it was in before this - * routine was invoked. - */ - txq->head = head; - /* skip the first wrb (hdr); it's not mapped */ - queue_head_inc(txq); - while (copied) { - wrb = queue_head_node(txq); - unmap_tx_frag(dev, wrb, map_single); - map_single = false; - copied -= le32_to_cpu(wrb->frag_len); - adapter->drv_stats.dma_map_errors++; - queue_head_inc(txq); - } - txq->head = head; + adapter->drv_stats.dma_map_errors++; + be_xmit_restore(adapter, txo, head, map_single, copied); return 0; } @@ -869,7 +957,8 @@ static inline int qnq_async_evt_rcvd(struct be_adapter *adapter) static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params + *wrb_params) { u16 vlan_tag = 0; @@ -886,8 +975,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to * skip VLAN insertion */ - if (skip_hw_vlan) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); } if (vlan_tag) { @@ -905,8 +993,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, vlan_tag); if (unlikely(!skb)) return skb; - if (skip_hw_vlan) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); } return skb; @@ -946,7 +1033,8 @@ static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb) static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params + *wrb_params) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; unsigned int eth_hdr_len; @@ -970,7 +1058,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (be_pvid_tagging_enabled(adapter) && veh->h_vlan_proto == htons(ETH_P_8021Q)) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); /* HW has a bug wherein it will calculate CSUM for VLAN * pkts even though it is disabled. @@ -978,7 +1066,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (skb->ip_summed != CHECKSUM_PARTIAL && skb_vlan_tag_present(skb)) { - skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); + skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err; } @@ -1000,7 +1088,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (be_ipv6_tx_stall_chk(adapter, skb) && be_vlan_tag_tx_chk(adapter, skb)) { - skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); + skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err; } @@ -1014,7 +1102,7 @@ err: static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params *wrb_params) { /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or * less may cause a transmit stall on that port. So the work-around is @@ -1026,7 +1114,7 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, } if (BEx_chip(adapter) || lancer_chip(adapter)) { - skb = be_lancer_xmit_workarounds(adapter, skb, skip_hw_vlan); + skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params); if (!skb) return NULL; } @@ -1060,24 +1148,26 @@ static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo) static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) { - bool skip_hw_vlan = false, flush = !skb->xmit_more; struct be_adapter *adapter = netdev_priv(netdev); u16 q_idx = skb_get_queue_mapping(skb); struct be_tx_obj *txo = &adapter->tx_obj[q_idx]; - struct be_queue_info *txq = &txo->q; + struct be_wrb_params wrb_params = { 0 }; + bool flush = !skb->xmit_more; u16 wrb_cnt; - skb = be_xmit_workarounds(adapter, skb, &skip_hw_vlan); + skb = be_xmit_workarounds(adapter, skb, &wrb_params); if (unlikely(!skb)) goto drop; - wrb_cnt = be_xmit_enqueue(adapter, txo, skb, skip_hw_vlan); + be_get_wrb_params_from_skb(adapter, skb, &wrb_params); + + wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); if (unlikely(!wrb_cnt)) { dev_kfree_skb_any(skb); goto drop; } - if ((atomic_read(&txq->used) + BE_MAX_TX_FRAG_COUNT) >= txq->len) { + if (be_is_txq_full(txo)) { netif_stop_subqueue(netdev, q_idx); tx_stats(txo)->tx_stops++; } @@ -2032,7 +2122,7 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) if (rxo->rx_post_starved) rxo->rx_post_starved = false; do { - notify = min(256u, posted); + notify = min(MAX_NUM_POST_ERX_DB, posted); be_rxq_notify(adapter, rxq->id, notify); posted -= notify; } while (posted); @@ -2042,18 +2132,23 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) } } -static struct be_eth_tx_compl *be_tx_compl_get(struct be_queue_info *tx_cq) +static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo) { - struct be_eth_tx_compl *txcp = queue_tail_node(tx_cq); + struct be_queue_info *tx_cq = &txo->cq; + struct be_tx_compl_info *txcp = &txo->txcp; + struct be_eth_tx_compl *compl = queue_tail_node(tx_cq); - if (txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0) + if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0) return NULL; + /* Ensure load ordering of valid bit dword and other dwords below */ rmb(); - be_dws_le_to_cpu(txcp, sizeof(*txcp)); + be_dws_le_to_cpu(compl, sizeof(*compl)); - txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0; + txcp->status = GET_TX_COMPL_BITS(status, compl); + txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl); + compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0; queue_tail_inc(tx_cq); return txcp; } @@ -2174,9 +2269,9 @@ static void be_tx_compl_clean(struct be_adapter *adapter) { u16 end_idx, notified_idx, cmpl = 0, timeo = 0, num_wrbs = 0; struct device *dev = &adapter->pdev->dev; - struct be_tx_obj *txo; + struct be_tx_compl_info *txcp; struct be_queue_info *txq; - struct be_eth_tx_compl *txcp; + struct be_tx_obj *txo; int i, pending_txqs; /* Stop polling for compls when HW has been silent for 10ms */ @@ -2187,10 +2282,10 @@ static void be_tx_compl_clean(struct be_adapter *adapter) cmpl = 0; num_wrbs = 0; txq = &txo->q; - while ((txcp = be_tx_compl_get(&txo->cq))) { - end_idx = GET_TX_COMPL_BITS(wrb_index, txcp); - num_wrbs += be_tx_compl_process(adapter, txo, - end_idx); + while ((txcp = be_tx_compl_get(txo))) { + num_wrbs += + be_tx_compl_process(adapter, txo, + txcp->end_index); cmpl++; } if (cmpl) { @@ -2198,7 +2293,7 @@ static void be_tx_compl_clean(struct be_adapter *adapter) atomic_sub(num_wrbs, &txq->used); timeo = 0; } - if (atomic_read(&txq->used) == txo->pend_wrb_cnt) + if (!be_is_tx_compl_pending(txo)) pending_txqs--; } @@ -2247,6 +2342,7 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); } + free_cpumask_var(eqo->affinity_mask); be_queue_free(adapter, &eqo->q); } } @@ -2262,6 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), + eqo->affinity_mask); + netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); @@ -2353,8 +2454,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter) static int be_tx_qs_create(struct be_adapter *adapter) { - struct be_queue_info *cq, *eq; + struct be_queue_info *cq; struct be_tx_obj *txo; + struct be_eq_obj *eqo; int status, i; adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter)); @@ -2372,8 +2474,8 @@ static int be_tx_qs_create(struct be_adapter *adapter) /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ - eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; - status = be_cmd_cq_create(adapter, cq, eq, false, 3); + eqo = &adapter->eq_obj[i % adapter->num_evt_qs]; + status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3); if (status) return status; @@ -2385,6 +2487,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) status = be_cmd_txq_create(adapter, txo); if (status) return status; + + netif_set_xps_queue(adapter->netdev, eqo->affinity_mask, + eqo->idx); } dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n", @@ -2413,13 +2518,19 @@ static int be_rx_cqs_create(struct be_adapter *adapter) int rc, i; /* We can create as many RSS rings as there are EQs. */ - adapter->num_rx_qs = adapter->num_evt_qs; + adapter->num_rss_qs = adapter->num_evt_qs; + + /* We'll use RSS only if atleast 2 RSS rings are supported. */ + if (adapter->num_rss_qs <= 1) + adapter->num_rss_qs = 0; + + adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq; - /* We'll use RSS only if atleast 2 RSS rings are supported. - * When RSS is used, we'll need a default RXQ for non-IP traffic. + /* When the interface is not capable of RSS rings (and there is no + * need to create a default RXQ) we'll still need one RXQ */ - if (adapter->num_rx_qs > 1) - adapter->num_rx_qs++; + if (adapter->num_rx_qs == 0) + adapter->num_rx_qs = 1; adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE; for_all_rx_queues(adapter, rxo, i) { @@ -2438,8 +2549,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter) } dev_info(&adapter->pdev->dev, - "created %d RSS queue(s) and 1 default RX queue\n", - adapter->num_rx_qs - 1); + "created %d RX queue(s)\n", adapter->num_rx_qs); return 0; } @@ -2549,7 +2659,7 @@ loop_continue: return work_done; } -static inline void be_update_tx_err(struct be_tx_obj *txo, u32 status) +static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status) { switch (status) { case BE_TX_COMP_HDR_PARSE_ERR: @@ -2564,7 +2674,7 @@ static inline void be_update_tx_err(struct be_tx_obj *txo, u32 status) } } -static inline void lancer_update_tx_err(struct be_tx_obj *txo, u32 status) +static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status) { switch (status) { case LANCER_TX_COMP_LSO_ERR: @@ -2589,22 +2699,18 @@ static inline void lancer_update_tx_err(struct be_tx_obj *txo, u32 status) static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo, int idx) { - struct be_eth_tx_compl *txcp; int num_wrbs = 0, work_done = 0; - u32 compl_status; - u16 last_idx; + struct be_tx_compl_info *txcp; - while ((txcp = be_tx_compl_get(&txo->cq))) { - last_idx = GET_TX_COMPL_BITS(wrb_index, txcp); - num_wrbs += be_tx_compl_process(adapter, txo, last_idx); + while ((txcp = be_tx_compl_get(txo))) { + num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index); work_done++; - compl_status = GET_TX_COMPL_BITS(status, txcp); - if (compl_status) { + if (txcp->status) { if (lancer_chip(adapter)) - lancer_update_tx_err(txo, compl_status); + lancer_update_tx_err(txo, txcp->status); else - be_update_tx_err(txo, compl_status); + be_update_tx_err(txo, txcp->status); } } @@ -2615,7 +2721,7 @@ static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo, /* As Tx wrbs have been freed up, wake up netdev queue * if it was stopped due to lack of tx wrbs. */ if (__netif_subqueue_stopped(adapter->netdev, idx) && - atomic_read(&txo->q.used) < txo->q.len / 2) { + be_can_txq_wake(txo)) { netif_wake_subqueue(adapter->netdev, idx); } @@ -2807,12 +2913,12 @@ void be_detect_error(struct be_adapter *adapter) sliport_err2 = ioread32(adapter->db + SLIPORT_ERROR2_OFFSET); adapter->hw_error = true; + error_detected = true; /* Do not log error messages if its a FW reset */ if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 && sliport_err2 == SLIPORT_ERROR_FW_RESET2) { dev_info(dev, "Firmware update in progress\n"); } else { - error_detected = true; dev_err(dev, "Error detected in the card\n"); dev_err(dev, "ERR: sliport status 0x%x\n", sliport_status); @@ -2932,6 +3038,8 @@ static int be_msix_register(struct be_adapter *adapter) status = request_irq(vec, be_msix, 0, eqo->desc, eqo); if (status) goto err_msix; + + irq_set_affinity_hint(vec, eqo->affinity_mask); } return 0; @@ -2976,7 +3084,7 @@ static void be_irq_unregister(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct be_eq_obj *eqo; - int i; + int i, vec; if (!adapter->isr_registered) return; @@ -2988,8 +3096,11 @@ static void be_irq_unregister(struct be_adapter *adapter) } /* MSIx */ - for_all_evt_queues(adapter, eqo, i) - free_irq(be_msix_vec_get(adapter, eqo), eqo); + for_all_evt_queues(adapter, eqo, i) { + vec = be_msix_vec_get(adapter, eqo); + irq_set_affinity_hint(vec, NULL); + free_irq(vec, eqo); + } done: adapter->isr_registered = false; @@ -3071,12 +3182,14 @@ static int be_rx_qs_create(struct be_adapter *adapter) return rc; } - /* The FW would like the default RXQ to be created first */ - rxo = default_rxo(adapter); - rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, rx_frag_size, - adapter->if_handle, false, &rxo->rss_id); - if (rc) - return rc; + if (adapter->need_def_rxq || !adapter->num_rss_qs) { + rxo = default_rxo(adapter); + rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, + rx_frag_size, adapter->if_handle, + false, &rxo->rss_id); + if (rc) + return rc; + } for_all_rss_queues(adapter, rxo, i) { rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, @@ -3087,8 +3200,7 @@ static int be_rx_qs_create(struct be_adapter *adapter) } if (be_multi_rxq(adapter)) { - for (j = 0; j < RSS_INDIR_TABLE_LEN; - j += adapter->num_rx_qs - 1) { + for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) { for_all_rss_queues(adapter, rxo, i) { if ((j + i) >= RSS_INDIR_TABLE_LEN) break; @@ -3179,7 +3291,7 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable) int status = 0; u8 mac[ETH_ALEN]; - memset(mac, 0, ETH_ALEN); + eth_zero_addr(mac); cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config); cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, @@ -3324,6 +3436,14 @@ static void be_cancel_worker(struct be_adapter *adapter) } } +static void be_cancel_err_detection(struct be_adapter *adapter) +{ + if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) { + cancel_delayed_work_sync(&adapter->be_err_detection_work); + adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED; + } +} + static void be_mac_clear(struct be_adapter *adapter) { if (adapter->pmac_id) { @@ -3355,8 +3475,39 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter) } #endif +static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs) +{ + struct be_resources res = adapter->pool_res; + u16 num_vf_qs = 1; + + /* Distribute the queue resources equally among the PF and it's VFs + * Do not distribute queue resources in multi-channel configuration. + */ + if (num_vfs && !be_is_mc(adapter)) { + /* If number of VFs requested is 8 less than max supported, + * assign 8 queue pairs to the PF and divide the remaining + * resources evenly among the VFs + */ + if (num_vfs < (be_max_vfs(adapter) - 8)) + num_vf_qs = (res.max_rss_qs - 8) / num_vfs; + else + num_vf_qs = res.max_rss_qs / num_vfs; + + /* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable + * interfaces per port. Provide RSS on VFs, only if number + * of VFs requested is less than MAX_RSS_IFACES limit. + */ + if (num_vfs >= MAX_RSS_IFACES) + num_vf_qs = 1; + } + return num_vf_qs; +} + static int be_clear(struct be_adapter *adapter) { + struct pci_dev *pdev = adapter->pdev; + u16 num_vf_qs; + be_cancel_worker(adapter); if (sriov_enabled(adapter)) @@ -3365,9 +3516,14 @@ static int be_clear(struct be_adapter *adapter) /* Re-configure FW to distribute resources evenly across max-supported * number of VFs, only when VFs are not already enabled. */ - if (be_physfn(adapter) && !pci_vfs_assigned(adapter->pdev)) + if (skyhawk_chip(adapter) && be_physfn(adapter) && + !pci_vfs_assigned(pdev)) { + num_vf_qs = be_calculate_vf_qs(adapter, + pci_sriov_get_totalvfs(pdev)); be_cmd_set_sriov_config(adapter, adapter->pool_res, - pci_sriov_get_totalvfs(adapter->pdev)); + pci_sriov_get_totalvfs(pdev), + num_vf_qs); + } #ifdef CONFIG_BE2NET_VXLAN be_disable_vxlan_offloads(adapter); @@ -3391,7 +3547,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | - BE_IF_FLAGS_RSS; + BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS; en_flags &= cap_flags; @@ -3412,6 +3568,7 @@ static int be_vfs_if_create(struct be_adapter *adapter) for_all_vfs(adapter, vf_cfg, vf) { if (!BE3_chip(adapter)) { status = be_cmd_get_profile_config(adapter, &res, + RESOURCE_LIMITS, vf + 1); if (!status) { cap_flags = res.if_cap_flags; @@ -3585,7 +3742,8 @@ static void BEx_get_resources(struct be_adapter *adapter, /* On a SuperNIC profile, the driver needs to use the * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits */ - be_cmd_get_profile_config(adapter, &super_nic_res, 0); + be_cmd_get_profile_config(adapter, &super_nic_res, + RESOURCE_LIMITS, 0); /* Some old versions of BE3 FW don't report max_tx_qs value */ res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS; } else { @@ -3605,6 +3763,7 @@ static void BEx_get_resources(struct be_adapter *adapter, res->max_evt_qs = 1; res->if_cap_flags = BE_IF_CAP_FLAGS_WANT; + res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS; if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS)) res->if_cap_flags &= ~BE_IF_FLAGS_RSS; } @@ -3624,13 +3783,12 @@ static void be_setup_init(struct be_adapter *adapter) static int be_get_sriov_config(struct be_adapter *adapter) { - struct device *dev = &adapter->pdev->dev; struct be_resources res = {0}; int max_vfs, old_vfs; - /* Some old versions of BE3 FW don't report max_vfs value */ - be_cmd_get_profile_config(adapter, &res, 0); + be_cmd_get_profile_config(adapter, &res, RESOURCE_LIMITS, 0); + /* Some old versions of BE3 FW don't report max_vfs value */ if (BE3_chip(adapter) && !res.max_vfs) { max_vfs = pci_sriov_get_totalvfs(adapter->pdev); res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0; @@ -3638,35 +3796,49 @@ static int be_get_sriov_config(struct be_adapter *adapter) adapter->pool_res = res; - if (!be_max_vfs(adapter)) { - if (num_vfs) - dev_warn(dev, "SRIOV is disabled. Ignoring num_vfs\n"); - adapter->num_vfs = 0; - return 0; - } - - pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter)); - - /* validate num_vfs module param */ + /* If during previous unload of the driver, the VFs were not disabled, + * then we cannot rely on the PF POOL limits for the TotalVFs value. + * Instead use the TotalVFs value stored in the pci-dev struct. + */ old_vfs = pci_num_vf(adapter->pdev); if (old_vfs) { - dev_info(dev, "%d VFs are already enabled\n", old_vfs); - if (old_vfs != num_vfs) - dev_warn(dev, "Ignoring num_vfs=%d setting\n", num_vfs); + dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n", + old_vfs); + + adapter->pool_res.max_vfs = + pci_sriov_get_totalvfs(adapter->pdev); adapter->num_vfs = old_vfs; - } else { - if (num_vfs > be_max_vfs(adapter)) { - dev_info(dev, "Resources unavailable to init %d VFs\n", - num_vfs); - dev_info(dev, "Limiting to %d VFs\n", - be_max_vfs(adapter)); - } - adapter->num_vfs = min_t(u16, num_vfs, be_max_vfs(adapter)); } return 0; } +static void be_alloc_sriov_res(struct be_adapter *adapter) +{ + int old_vfs = pci_num_vf(adapter->pdev); + u16 num_vf_qs; + int status; + + be_get_sriov_config(adapter); + + if (!old_vfs) + pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter)); + + /* When the HW is in SRIOV capable configuration, the PF-pool + * resources are given to PF during driver load, if there are no + * old VFs. This facility is not available in BE3 FW. + * Also, this is done by FW in Lancer chip. + */ + if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) { + num_vf_qs = be_calculate_vf_qs(adapter, 0); + status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0, + num_vf_qs); + if (status) + dev_err(&adapter->pdev->dev, + "Failed to optimize SRIOV resources\n"); + } +} + static int be_get_resources(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; @@ -3687,12 +3859,23 @@ static int be_get_resources(struct be_adapter *adapter) if (status) return status; + /* If a deafault RXQ must be created, we'll use up one RSSQ*/ + if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs && + !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)) + res.max_rss_qs -= 1; + /* If RoCE may be enabled stash away half the EQs for RoCE */ if (be_roce_supported(adapter)) res.max_evt_qs /= 2; adapter->res = res; } + /* If FW supports RSS default queue, then skip creating non-RSS + * queue for non-IP traffic. + */ + adapter->need_def_rxq = (be_if_cap_flags(adapter) & + BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1; + dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n", be_max_txqs(adapter), be_max_rxqs(adapter), be_max_rss(adapter), be_max_eqs(adapter), @@ -3701,47 +3884,33 @@ static int be_get_resources(struct be_adapter *adapter) be_max_uc(adapter), be_max_mc(adapter), be_max_vlans(adapter)); + /* Sanitize cfg_num_qs based on HW and platform limits */ + adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(), + be_max_qs(adapter)); return 0; } -static void be_sriov_config(struct be_adapter *adapter) -{ - struct device *dev = &adapter->pdev->dev; - int status; - - status = be_get_sriov_config(adapter); - if (status) { - dev_err(dev, "Failed to query SR-IOV configuration\n"); - dev_err(dev, "SR-IOV cannot be enabled\n"); - return; - } - - /* When the HW is in SRIOV capable configuration, the PF-pool - * resources are equally distributed across the max-number of - * VFs. The user may request only a subset of the max-vfs to be - * enabled. Based on num_vfs, redistribute the resources across - * num_vfs so that each VF will have access to more number of - * resources. This facility is not available in BE3 FW. - * Also, this is done by FW in Lancer chip. - */ - if (be_max_vfs(adapter) && !pci_num_vf(adapter->pdev)) { - status = be_cmd_set_sriov_config(adapter, - adapter->pool_res, - adapter->num_vfs); - if (status) - dev_err(dev, "Failed to optimize SR-IOV resources\n"); - } -} - static int be_get_config(struct be_adapter *adapter) { + int status, level; u16 profile_id; - int status; + + status = be_cmd_get_cntl_attributes(adapter); + if (status) + return status; status = be_cmd_query_fw_cfg(adapter); if (status) return status; + if (BEx_chip(adapter)) { + level = be_cmd_get_fw_log_level(adapter); + adapter->msg_enable = + level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0; + } + + be_cmd_get_acpi_wol_cap(adapter); + be_cmd_query_port_name(adapter); if (be_physfn(adapter)) { @@ -3751,9 +3920,6 @@ static int be_get_config(struct be_adapter *adapter) "Using profile 0x%x\n", profile_id); } - if (!BE2_chip(adapter) && be_physfn(adapter)) - be_sriov_config(adapter); - status = be_get_resources(adapter); if (status) return status; @@ -3763,9 +3929,6 @@ static int be_get_config(struct be_adapter *adapter) if (!adapter->pmac_id) return -ENOMEM; - /* Sanitize cfg_num_qs based on HW and platform limits */ - adapter->cfg_num_qs = min(adapter->cfg_num_qs, be_max_qs(adapter)); - return 0; } @@ -3799,6 +3962,13 @@ static void be_schedule_worker(struct be_adapter *adapter) adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } +static void be_schedule_err_detection(struct be_adapter *adapter) +{ + schedule_delayed_work(&adapter->be_err_detection_work, + msecs_to_jiffies(1000)); + adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED; +} + static int be_setup_queues(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -3881,16 +4051,61 @@ static inline int fw_major_num(const char *fw_ver) return fw_major; } +/* If any VFs are already enabled don't FLR the PF */ +static bool be_reset_required(struct be_adapter *adapter) +{ + return pci_num_vf(adapter->pdev) ? false : true; +} + +/* Wait for the FW to be ready and perform the required initialization */ +static int be_func_init(struct be_adapter *adapter) +{ + int status; + + status = be_fw_wait_ready(adapter); + if (status) + return status; + + if (be_reset_required(adapter)) { + status = be_cmd_reset_function(adapter); + if (status) + return status; + + /* Wait for interrupts to quiesce after an FLR */ + msleep(100); + + /* We can clear all errors when function reset succeeds */ + be_clear_all_error(adapter); + } + + /* Tell FW we're ready to fire cmds */ + status = be_cmd_fw_init(adapter); + if (status) + return status; + + /* Allow interrupts for other ULPs running on NIC function */ + be_intr_set(adapter, true); + + return 0; +} + static int be_setup(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; int status; + status = be_func_init(adapter); + if (status) + return status; + be_setup_init(adapter); if (!lancer_chip(adapter)) be_cmd_req_native_mode(adapter); + if (!BE2_chip(adapter) && be_physfn(adapter)) + be_alloc_sriov_res(adapter); + status = be_get_config(adapter); if (status) goto err; @@ -3931,8 +4146,6 @@ static int be_setup(struct be_adapter *adapter) be_set_rx_mode(adapter->netdev); - be_cmd_get_acpi_wol_cap(adapter); - status = be_cmd_set_flow_control(adapter, adapter->tx_fc, adapter->rx_fc); if (status) @@ -4842,6 +5055,142 @@ static void be_netdev_init(struct net_device *netdev) netdev->ethtool_ops = &be_ethtool_ops; } +static void be_cleanup(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + rtnl_lock(); + netif_device_detach(netdev); + if (netif_running(netdev)) + be_close(netdev); + rtnl_unlock(); + + be_clear(adapter); +} + +static int be_resume(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int status; + + status = be_setup(adapter); + if (status) + return status; + + if (netif_running(netdev)) { + status = be_open(netdev); + if (status) + return status; + } + + netif_device_attach(netdev); + + return 0; +} + +static int be_err_recover(struct be_adapter *adapter) +{ + struct device *dev = &adapter->pdev->dev; + int status; + + status = be_resume(adapter); + if (status) + goto err; + + dev_info(dev, "Adapter recovery successful\n"); + return 0; +err: + if (be_physfn(adapter)) + dev_err(dev, "Adapter recovery failed\n"); + else + dev_err(dev, "Re-trying adapter recovery\n"); + + return status; +} + +static void be_err_detection_task(struct work_struct *work) +{ + struct be_adapter *adapter = + container_of(work, struct be_adapter, + be_err_detection_work.work); + int status = 0; + + be_detect_error(adapter); + + if (adapter->hw_error) { + be_cleanup(adapter); + + /* As of now error recovery support is in Lancer only */ + if (lancer_chip(adapter)) + status = be_err_recover(adapter); + } + + /* Always attempt recovery on VFs */ + if (!status || be_virtfn(adapter)) + be_schedule_err_detection(adapter); +} + +static void be_log_sfp_info(struct be_adapter *adapter) +{ + int status; + + status = be_cmd_query_sfp_info(adapter); + if (!status) { + dev_err(&adapter->pdev->dev, + "Unqualified SFP+ detected on %c from %s part no: %s", + adapter->port_name, adapter->phy.vendor_name, + adapter->phy.vendor_pn); + } + adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP; +} + +static void be_worker(struct work_struct *work) +{ + struct be_adapter *adapter = + container_of(work, struct be_adapter, work.work); + struct be_rx_obj *rxo; + int i; + + /* when interrupts are not yet enabled, just reap any pending + * mcc completions + */ + if (!netif_running(adapter->netdev)) { + local_bh_disable(); + be_process_mcc(adapter); + local_bh_enable(); + goto reschedule; + } + + if (!adapter->stats_cmd_sent) { + if (lancer_chip(adapter)) + lancer_cmd_get_pport_stats(adapter, + &adapter->stats_cmd); + else + be_cmd_get_stats(adapter, &adapter->stats_cmd); + } + + if (be_physfn(adapter) && + MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0) + be_cmd_get_die_temperature(adapter); + + for_all_rx_queues(adapter, rxo, i) { + /* Replenish RX-queues starved due to memory + * allocation failures. + */ + if (rxo->rx_post_starved) + be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); + } + + be_eqd_update(adapter); + + if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) + be_log_sfp_info(adapter); + +reschedule: + adapter->work_counter++; + schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); +} + static void be_unmap_pci_bars(struct be_adapter *adapter) { if (adapter->csr) @@ -4874,6 +5223,12 @@ static int be_map_pci_bars(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; u8 __iomem *addr; + u32 sli_intf; + + pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); + adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >> + SLI_INTF_FAMILY_SHIFT; + adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; if (BEx_chip(adapter) && be_physfn(adapter)) { adapter->csr = pci_iomap(pdev, 2, 0); @@ -4907,109 +5262,93 @@ pci_map_err: return -ENOMEM; } -static void be_ctrl_cleanup(struct be_adapter *adapter) +static void be_drv_cleanup(struct be_adapter *adapter) { struct be_dma_mem *mem = &adapter->mbox_mem_alloced; - - be_unmap_pci_bars(adapter); + struct device *dev = &adapter->pdev->dev; if (mem->va) - dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, - mem->dma); + dma_free_coherent(dev, mem->size, mem->va, mem->dma); mem = &adapter->rx_filter; if (mem->va) - dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, - mem->dma); + dma_free_coherent(dev, mem->size, mem->va, mem->dma); + + mem = &adapter->stats_cmd; + if (mem->va) + dma_free_coherent(dev, mem->size, mem->va, mem->dma); } -static int be_ctrl_init(struct be_adapter *adapter) +/* Allocate and initialize various fields in be_adapter struct */ +static int be_drv_init(struct be_adapter *adapter) { struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced; struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem; struct be_dma_mem *rx_filter = &adapter->rx_filter; - u32 sli_intf; - int status; - - pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); - adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >> - SLI_INTF_FAMILY_SHIFT; - adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; - - status = be_map_pci_bars(adapter); - if (status) - goto done; + struct be_dma_mem *stats_cmd = &adapter->stats_cmd; + struct device *dev = &adapter->pdev->dev; + int status = 0; mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16; - mbox_mem_alloc->va = dma_alloc_coherent(&adapter->pdev->dev, - mbox_mem_alloc->size, + mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size, &mbox_mem_alloc->dma, GFP_KERNEL); - if (!mbox_mem_alloc->va) { - status = -ENOMEM; - goto unmap_pci_bars; - } + if (!mbox_mem_alloc->va) + return -ENOMEM; + mbox_mem_align->size = sizeof(struct be_mcc_mailbox); mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16); mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16); memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox)); rx_filter->size = sizeof(struct be_cmd_req_rx_filter); - rx_filter->va = dma_zalloc_coherent(&adapter->pdev->dev, - rx_filter->size, &rx_filter->dma, - GFP_KERNEL); + rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size, + &rx_filter->dma, GFP_KERNEL); if (!rx_filter->va) { status = -ENOMEM; goto free_mbox; } + if (lancer_chip(adapter)) + stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats); + else if (BE2_chip(adapter)) + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0); + else if (BE3_chip(adapter)) + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1); + else + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2); + stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size, + &stats_cmd->dma, GFP_KERNEL); + if (!stats_cmd->va) { + status = -ENOMEM; + goto free_rx_filter; + } + mutex_init(&adapter->mbox_lock); spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); - init_completion(&adapter->et_cmd_compl); - pci_save_state(adapter->pdev); - return 0; -free_mbox: - dma_free_coherent(&adapter->pdev->dev, mbox_mem_alloc->size, - mbox_mem_alloc->va, mbox_mem_alloc->dma); - -unmap_pci_bars: - be_unmap_pci_bars(adapter); - -done: - return status; -} - -static void be_stats_cleanup(struct be_adapter *adapter) -{ - struct be_dma_mem *cmd = &adapter->stats_cmd; + pci_save_state(adapter->pdev); - if (cmd->va) - dma_free_coherent(&adapter->pdev->dev, cmd->size, - cmd->va, cmd->dma); -} + INIT_DELAYED_WORK(&adapter->work, be_worker); + INIT_DELAYED_WORK(&adapter->be_err_detection_work, + be_err_detection_task); -static int be_stats_init(struct be_adapter *adapter) -{ - struct be_dma_mem *cmd = &adapter->stats_cmd; + adapter->rx_fc = true; + adapter->tx_fc = true; - if (lancer_chip(adapter)) - cmd->size = sizeof(struct lancer_cmd_req_pport_stats); - else if (BE2_chip(adapter)) - cmd->size = sizeof(struct be_cmd_req_get_stats_v0); - else if (BE3_chip(adapter)) - cmd->size = sizeof(struct be_cmd_req_get_stats_v1); - else - /* ALL non-BE ASICs */ - cmd->size = sizeof(struct be_cmd_req_get_stats_v2); + /* Must be a power of 2 or else MODULO will BUG_ON */ + adapter->be_get_temp_freq = 64; - cmd->va = dma_zalloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma, - GFP_KERNEL); - if (!cmd->va) - return -ENOMEM; return 0; + +free_rx_filter: + dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma); +free_mbox: + dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va, + mbox_mem_alloc->dma); + return status; } static void be_remove(struct pci_dev *pdev) @@ -5022,7 +5361,7 @@ static void be_remove(struct pci_dev *pdev) be_roce_dev_remove(adapter); be_intr_set(adapter, false); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); unregister_netdev(adapter->netdev); @@ -5031,9 +5370,8 @@ static void be_remove(struct pci_dev *pdev) /* tell fw we're done with firing cmds */ be_cmd_fw_clean(adapter); - be_stats_cleanup(adapter); - - be_ctrl_cleanup(adapter); + be_unmap_pci_bars(adapter); + be_drv_cleanup(adapter); pci_disable_pcie_error_reporting(pdev); @@ -5043,156 +5381,6 @@ static void be_remove(struct pci_dev *pdev) free_netdev(adapter->netdev); } -static int be_get_initial_config(struct be_adapter *adapter) -{ - int status, level; - - status = be_cmd_get_cntl_attributes(adapter); - if (status) - return status; - - /* Must be a power of 2 or else MODULO will BUG_ON */ - adapter->be_get_temp_freq = 64; - - if (BEx_chip(adapter)) { - level = be_cmd_get_fw_log_level(adapter); - adapter->msg_enable = - level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0; - } - - adapter->cfg_num_qs = netif_get_num_default_rss_queues(); - return 0; -} - -static int lancer_recover_func(struct be_adapter *adapter) -{ - struct device *dev = &adapter->pdev->dev; - int status; - - status = lancer_test_and_set_rdy_state(adapter); - if (status) - goto err; - - if (netif_running(adapter->netdev)) - be_close(adapter->netdev); - - be_clear(adapter); - - be_clear_all_error(adapter); - - status = be_setup(adapter); - if (status) - goto err; - - if (netif_running(adapter->netdev)) { - status = be_open(adapter->netdev); - if (status) - goto err; - } - - dev_err(dev, "Adapter recovery successful\n"); - return 0; -err: - if (status == -EAGAIN) - dev_err(dev, "Waiting for resource provisioning\n"); - else - dev_err(dev, "Adapter recovery failed\n"); - - return status; -} - -static void be_func_recovery_task(struct work_struct *work) -{ - struct be_adapter *adapter = - container_of(work, struct be_adapter, func_recovery_work.work); - int status = 0; - - be_detect_error(adapter); - - if (adapter->hw_error && lancer_chip(adapter)) { - rtnl_lock(); - netif_device_detach(adapter->netdev); - rtnl_unlock(); - - status = lancer_recover_func(adapter); - if (!status) - netif_device_attach(adapter->netdev); - } - - /* In Lancer, for all errors other than provisioning error (-EAGAIN), - * no need to attempt further recovery. - */ - if (!status || status == -EAGAIN) - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); -} - -static void be_log_sfp_info(struct be_adapter *adapter) -{ - int status; - - status = be_cmd_query_sfp_info(adapter); - if (!status) { - dev_err(&adapter->pdev->dev, - "Unqualified SFP+ detected on %c from %s part no: %s", - adapter->port_name, adapter->phy.vendor_name, - adapter->phy.vendor_pn); - } - adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP; -} - -static void be_worker(struct work_struct *work) -{ - struct be_adapter *adapter = - container_of(work, struct be_adapter, work.work); - struct be_rx_obj *rxo; - int i; - - /* when interrupts are not yet enabled, just reap any pending - * mcc completions */ - if (!netif_running(adapter->netdev)) { - local_bh_disable(); - be_process_mcc(adapter); - local_bh_enable(); - goto reschedule; - } - - if (!adapter->stats_cmd_sent) { - if (lancer_chip(adapter)) - lancer_cmd_get_pport_stats(adapter, - &adapter->stats_cmd); - else - be_cmd_get_stats(adapter, &adapter->stats_cmd); - } - - if (be_physfn(adapter) && - MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0) - be_cmd_get_die_temperature(adapter); - - for_all_rx_queues(adapter, rxo, i) { - /* Replenish RX-queues starved due to memory - * allocation failures. - */ - if (rxo->rx_post_starved) - be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); - } - - be_eqd_update(adapter); - - if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) - be_log_sfp_info(adapter); - -reschedule: - adapter->work_counter++; - schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); -} - -/* If any VFs are already enabled don't FLR the PF */ -static bool be_reset_required(struct be_adapter *adapter) -{ - return pci_num_vf(adapter->pdev) ? false : true; -} - static char *mc_name(struct be_adapter *adapter) { char *str = ""; /* default */ @@ -5291,50 +5479,17 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) if (!status) dev_info(&pdev->dev, "PCIe error reporting enabled\n"); - status = be_ctrl_init(adapter); + status = be_map_pci_bars(adapter); if (status) goto free_netdev; - /* sync up with fw's ready state */ - if (be_physfn(adapter)) { - status = be_fw_wait_ready(adapter); - if (status) - goto ctrl_clean; - } - - if (be_reset_required(adapter)) { - status = be_cmd_reset_function(adapter); - if (status) - goto ctrl_clean; - - /* Wait for interrupts to quiesce after an FLR */ - msleep(100); - } - - /* Allow interrupts for other ULPs running on NIC function */ - be_intr_set(adapter, true); - - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); - if (status) - goto ctrl_clean; - - status = be_stats_init(adapter); - if (status) - goto ctrl_clean; - - status = be_get_initial_config(adapter); + status = be_drv_init(adapter); if (status) - goto stats_clean; - - INIT_DELAYED_WORK(&adapter->work, be_worker); - INIT_DELAYED_WORK(&adapter->func_recovery_work, be_func_recovery_task); - adapter->rx_fc = true; - adapter->tx_fc = true; + goto unmap_bars; status = be_setup(adapter); if (status) - goto stats_clean; + goto drv_cleanup; be_netdev_init(netdev); status = register_netdev(netdev); @@ -5343,8 +5498,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_roce_dev_add(adapter); - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); + be_schedule_err_detection(adapter); dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev), func_name(adapter), mc_name(adapter), adapter->port_name); @@ -5353,10 +5507,10 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) unsetup: be_clear(adapter); -stats_clean: - be_stats_cleanup(adapter); -ctrl_clean: - be_ctrl_cleanup(adapter); +drv_cleanup: + be_drv_cleanup(adapter); +unmap_bars: + be_unmap_pci_bars(adapter); free_netdev: free_netdev(netdev); rel_reg: @@ -5371,21 +5525,14 @@ do_none: static int be_suspend(struct pci_dev *pdev, pm_message_t state) { struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; if (adapter->wol_en) be_setup_wol(adapter, true); be_intr_set(adapter, false); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); - netif_device_detach(netdev); - if (netif_running(netdev)) { - rtnl_lock(); - be_close(netdev); - rtnl_unlock(); - } - be_clear(adapter); + be_cleanup(adapter); pci_save_state(pdev); pci_disable_device(pdev); @@ -5393,13 +5540,10 @@ static int be_suspend(struct pci_dev *pdev, pm_message_t state) return 0; } -static int be_resume(struct pci_dev *pdev) +static int be_pci_resume(struct pci_dev *pdev) { - int status = 0; struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; - - netif_device_detach(netdev); + int status = 0; status = pci_enable_device(pdev); if (status) @@ -5408,30 +5552,11 @@ static int be_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - status = be_fw_wait_ready(adapter); - if (status) - return status; - - status = be_cmd_reset_function(adapter); - if (status) - return status; - - be_intr_set(adapter, true); - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); + status = be_resume(adapter); if (status) return status; - be_setup(adapter); - if (netif_running(netdev)) { - rtnl_lock(); - be_open(netdev); - rtnl_unlock(); - } - - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); - netif_device_attach(netdev); + be_schedule_err_detection(adapter); if (adapter->wol_en) be_setup_wol(adapter, false); @@ -5451,7 +5576,7 @@ static void be_shutdown(struct pci_dev *pdev) be_roce_dev_shutdown(adapter); cancel_delayed_work_sync(&adapter->work); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); netif_device_detach(adapter->netdev); @@ -5464,22 +5589,15 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; dev_err(&adapter->pdev->dev, "EEH error detected\n"); if (!adapter->eeh_error) { adapter->eeh_error = true; - cancel_delayed_work_sync(&adapter->func_recovery_work); - - rtnl_lock(); - netif_device_detach(netdev); - if (netif_running(netdev)) - be_close(netdev); - rtnl_unlock(); + be_cancel_err_detection(adapter); - be_clear(adapter); + be_cleanup(adapter); } if (state == pci_channel_io_perm_failure) @@ -5530,43 +5648,73 @@ static void be_eeh_resume(struct pci_dev *pdev) { int status = 0; struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; dev_info(&adapter->pdev->dev, "EEH resume\n"); pci_save_state(pdev); - status = be_cmd_reset_function(adapter); + status = be_resume(adapter); if (status) goto err; - /* On some BE3 FW versions, after a HW reset, - * interrupts will remain disabled for each function. - * So, explicitly enable interrupts + be_schedule_err_detection(adapter); + return; +err: + dev_err(&adapter->pdev->dev, "EEH resume failed\n"); +} + +static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct be_adapter *adapter = pci_get_drvdata(pdev); + u16 num_vf_qs; + int status; + + if (!num_vfs) + be_vf_clear(adapter); + + adapter->num_vfs = num_vfs; + + if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, + "Cannot disable VFs while they are assigned\n"); + return -EBUSY; + } + + /* When the HW is in SRIOV capable configuration, the PF-pool resources + * are equally distributed across the max-number of VFs. The user may + * request only a subset of the max-vfs to be enabled. + * Based on num_vfs, redistribute the resources across num_vfs so that + * each VF will have access to more number of resources. + * This facility is not available in BE3 FW. + * Also, this is done by FW in Lancer chip. */ - be_intr_set(adapter, true); + if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) { + num_vf_qs = be_calculate_vf_qs(adapter, adapter->num_vfs); + status = be_cmd_set_sriov_config(adapter, adapter->pool_res, + adapter->num_vfs, num_vf_qs); + if (status) + dev_err(&pdev->dev, + "Failed to optimize SR-IOV resources\n"); + } - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); + status = be_get_resources(adapter); if (status) - goto err; + return be_cmd_status(status); - status = be_setup(adapter); + /* Updating real_num_tx/rx_queues() requires rtnl_lock() */ + rtnl_lock(); + status = be_update_queues(adapter); + rtnl_unlock(); if (status) - goto err; + return be_cmd_status(status); - if (netif_running(netdev)) { - status = be_open(netdev); - if (status) - goto err; - } + if (adapter->num_vfs) + status = be_vf_setup(adapter); - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); - netif_device_attach(netdev); - return; -err: - dev_err(&adapter->pdev->dev, "EEH resume failed\n"); + if (!status) + return adapter->num_vfs; + + return 0; } static const struct pci_error_handlers be_eeh_handlers = { @@ -5581,8 +5729,9 @@ static struct pci_driver be_driver = { .probe = be_probe, .remove = be_remove, .suspend = be_suspend, - .resume = be_resume, + .resume = be_pci_resume, .shutdown = be_shutdown, + .sriov_configure = be_pci_sriov_configure, .err_handler = &be_eeh_handlers }; @@ -5596,6 +5745,11 @@ static int __init be_init_module(void) rx_frag_size = 2048; } + if (num_vfs > 0) { + pr_info(DRV_NAME " : Module param num_vfs is obsolete."); + pr_info(DRV_NAME " : Use sysfs method to enable VFs\n"); + } + return pci_register_driver(&be_driver); } module_init(be_init_module); |