diff options
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4')
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 131 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/sge.c | 226 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 123 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 5 |
8 files changed, 413 insertions, 117 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index d57282172ea5..410ed5805a9a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -431,6 +431,7 @@ struct sge_fl { /* SGE free-buffer queue state */ struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ __be64 *desc; /* address of HW Rx descriptor ring */ dma_addr_t addr; /* bus address of HW ring start */ + u64 udb; /* BAR2 offset of User Doorbell area */ }; /* A packet gather list */ @@ -451,6 +452,7 @@ struct sge_rspq { /* state for an SGE response queue */ u8 gen; /* current generation bit */ u8 intr_params; /* interrupt holdoff parameters */ u8 next_intr_params; /* holdoff params for next interrupt */ + u8 adaptive_rx; u8 pktcnt_idx; /* interrupt packet threshold */ u8 uld; /* ULD handling this queue */ u8 idx; /* queue index within its group */ @@ -459,6 +461,7 @@ struct sge_rspq { /* state for an SGE response queue */ u16 abs_id; /* absolute SGE id for the response q */ __be64 *desc; /* address of HW response ring */ dma_addr_t phys_addr; /* physical address of the ring */ + u64 udb; /* BAR2 offset of User Doorbell area */ unsigned int iqe_len; /* entry size */ unsigned int size; /* capacity of response queue */ struct adapter *adap; @@ -516,7 +519,7 @@ struct sge_txq { int db_disabled; unsigned short db_pidx; unsigned short db_pidx_inc; - u64 udb; + u64 udb; /* BAR2 offset of User Doorbell area */ }; struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ @@ -652,6 +655,7 @@ struct adapter { struct tid_info tids; void **tid_release_head; spinlock_t tid_release_lock; + struct workqueue_struct *workq; struct work_struct tid_release_task; struct work_struct db_full_task; struct work_struct db_drop_task; @@ -964,7 +968,7 @@ void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); int t4_slow_intr_handler(struct adapter *adapter); -int t4_wait_dev_ready(struct adapter *adap); +int t4_wait_dev_ready(void __iomem *regs); int t4_link_start(struct adapter *adap, unsigned int mbox, unsigned int port, struct link_config *lc); int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 1afee70ce856..5b38e955af6e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -283,6 +283,9 @@ static const struct pci_device_id cxgb4_pci_tbl[] = { CH_DEVICE(0x5083, 4), CH_DEVICE(0x5084, 4), CH_DEVICE(0x5085, 4), + CH_DEVICE(0x5086, 4), + CH_DEVICE(0x5087, 4), + CH_DEVICE(0x5088, 4), CH_DEVICE(0x5401, 4), CH_DEVICE(0x5402, 4), CH_DEVICE(0x5403, 4), @@ -310,6 +313,9 @@ static const struct pci_device_id cxgb4_pci_tbl[] = { CH_DEVICE(0x5483, 4), CH_DEVICE(0x5484, 4), CH_DEVICE(0x5485, 4), + CH_DEVICE(0x5486, 4), + CH_DEVICE(0x5487, 4), + CH_DEVICE(0x5488, 4), { 0, } }; @@ -643,8 +649,6 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) return ret; } -static struct workqueue_struct *workq; - /** * link_start - enable a port * @dev: the port to enable @@ -1255,7 +1259,9 @@ freeout: t4_free_sge_resources(adap); goto freeout; } - t4_write_reg(adap, MPS_TRC_RSS_CONTROL, + t4_write_reg(adap, is_t4(adap->params.chip) ? + MPS_TRC_RSS_CONTROL : + MPS_T5_TRC_RSS_CONTROL, RSSCONTROL(netdev2pinfo(adap->port[0])->tx_chan) | QUEUENUMBER(s->ethrxq[0].rspq.abs_id)); return 0; @@ -1763,7 +1769,8 @@ static void get_regs(struct net_device *dev, struct ethtool_regs *regs, 0xd004, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0xea7c, - 0xf000, 0x11190, + 0xf000, 0x11110, + 0x11118, 0x11190, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x19124, @@ -1970,7 +1977,8 @@ static void get_regs(struct net_device *dev, struct ethtool_regs *regs, 0xd004, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0x11088, - 0x1109c, 0x1117c, + 0x1109c, 0x11110, + 0x11118, 0x1117c, 0x11190, 0x11204, 0x19040, 0x1906c, 0x19078, 0x19080, @@ -2745,8 +2753,31 @@ static int set_rx_intr_params(struct net_device *dev, return 0; } +static int set_adaptive_rx_setting(struct net_device *dev, int adaptive_rx) +{ + int i; + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset]; + + for (i = 0; i < pi->nqsets; i++, q++) + q->rspq.adaptive_rx = adaptive_rx; + + return 0; +} + +static int get_adaptive_rx_setting(struct net_device *dev) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset]; + + return q->rspq.adaptive_rx; +} + static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { + set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce); return set_rx_intr_params(dev, c->rx_coalesce_usecs, c->rx_max_coalesced_frames); } @@ -2760,6 +2791,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) c->rx_coalesce_usecs = qtimer_val(adap, rq); c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ? adap->sge.counter_val[rq->pktcnt_idx] : 0; + c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev); return 0; } @@ -3340,7 +3372,7 @@ static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan, adap->tid_release_head = (void **)((uintptr_t)p | chan); if (!adap->tid_release_task_busy) { adap->tid_release_task_busy = true; - queue_work(workq, &adap->tid_release_task); + queue_work(adap->workq, &adap->tid_release_task); } spin_unlock_bh(&adap->tid_release_lock); } @@ -4140,7 +4172,7 @@ void t4_db_full(struct adapter *adap) notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL); t4_set_reg_field(adap, SGE_INT_ENABLE3, DBFIFO_HP_INT | DBFIFO_LP_INT, 0); - queue_work(workq, &adap->db_full_task); + queue_work(adap->workq, &adap->db_full_task); } } @@ -4150,7 +4182,7 @@ void t4_db_dropped(struct adapter *adap) disable_dbs(adap); notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL); } - queue_work(workq, &adap->db_drop_task); + queue_work(adap->workq, &adap->db_drop_task); } static void uld_attach(struct adapter *adap, unsigned int uld) @@ -4388,7 +4420,6 @@ static int cxgb4_inet6addr_handler(struct notifier_block *this, * bond. We need to find such different adapters and add clip * in all of them only once. */ - read_lock(&bond->lock); bond_for_each_slave(bond, slave, iter) { if (!first_pdev) { ret = clip_add(slave->dev, ifa, event); @@ -4402,7 +4433,6 @@ static int cxgb4_inet6addr_handler(struct notifier_block *this, to_pci_dev(slave->dev->dev.parent)) ret = clip_add(slave->dev, ifa, event); } - read_unlock(&bond->lock); } else ret = clip_add(ifa->idev->dev, ifa, event); @@ -5957,7 +5987,8 @@ static int adap_init0(struct adapter *adap) params[3] = FW_PARAM_PFVF(CQ_END); params[4] = FW_PARAM_PFVF(OCQ_START); params[5] = FW_PARAM_PFVF(OCQ_END); - ret = t4_query_params(adap, 0, 0, 0, 6, params, val); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params, + val); if (ret < 0) goto bye; adap->vres.qp.start = val[0]; @@ -5969,7 +6000,8 @@ static int adap_init0(struct adapter *adap) params[0] = FW_PARAM_DEV(MAXORDIRD_QP); params[1] = FW_PARAM_DEV(MAXIRD_ADAPTER); - ret = t4_query_params(adap, 0, 0, 0, 2, params, val); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, + val); if (ret < 0) { adap->params.max_ordird_qp = 8; adap->params.max_ird_adapter = 32 * adap->tids.ntids; @@ -6105,7 +6137,7 @@ static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); pci_cleanup_aer_uncorrect_error_status(pdev); - if (t4_wait_dev_ready(adap) < 0) + if (t4_wait_dev_ready(adap->regs) < 0) return PCI_ERS_RESULT_DISCONNECT; if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL) < 0) return PCI_ERS_RESULT_DISCONNECT; @@ -6474,6 +6506,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct port_info *pi; bool highdma = false; struct adapter *adapter = NULL; + void __iomem *regs; printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION); @@ -6490,19 +6523,39 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_release_regions; } + regs = pci_ioremap_bar(pdev, 0); + if (!regs) { + dev_err(&pdev->dev, "cannot map device registers\n"); + err = -ENOMEM; + goto out_disable_device; + } + + err = t4_wait_dev_ready(regs); + if (err < 0) + goto out_unmap_bar0; + + /* We control everything through one PF */ + func = SOURCEPF_GET(readl(regs + PL_WHOAMI)); + if (func != ent->driver_data) { + iounmap(regs); + pci_disable_device(pdev); + pci_save_state(pdev); /* to restore SR-IOV later */ + goto sriov; + } + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { highdma = true; err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_err(&pdev->dev, "unable to obtain 64-bit DMA for " "coherent allocations\n"); - goto out_disable_device; + goto out_unmap_bar0; } } else { err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "no usable DMA configuration\n"); - goto out_disable_device; + goto out_unmap_bar0; } } @@ -6514,26 +6567,19 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); if (!adapter) { err = -ENOMEM; - goto out_disable_device; + goto out_unmap_bar0; } - /* PCI device has been enabled */ - adapter->flags |= DEV_ENABLED; - - adapter->regs = pci_ioremap_bar(pdev, 0); - if (!adapter->regs) { - dev_err(&pdev->dev, "cannot map device registers\n"); + adapter->workq = create_singlethread_workqueue("cxgb4"); + if (!adapter->workq) { err = -ENOMEM; goto out_free_adapter; } - /* We control everything through one PF */ - func = SOURCEPF_GET(readl(adapter->regs + PL_WHOAMI)); - if (func != ent->driver_data) { - pci_save_state(pdev); /* to restore SR-IOV later */ - goto sriov; - } + /* PCI device has been enabled */ + adapter->flags |= DEV_ENABLED; + adapter->regs = regs; adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; adapter->mbox = func; @@ -6550,7 +6596,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err = t4_prep_adapter(adapter); if (err) - goto out_unmap_bar0; + goto out_free_adapter; + if (!is_t4(adapter->params.chip)) { s_qpp = QUEUESPERPAGEPF1 * adapter->fn; @@ -6567,14 +6614,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "Incorrect number of egress queues per page\n"); err = -EINVAL; - goto out_unmap_bar0; + goto out_free_adapter; } adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2), pci_resource_len(pdev, 2)); if (!adapter->bar2) { dev_err(&pdev->dev, "cannot map device bar2 region\n"); err = -ENOMEM; - goto out_unmap_bar0; + goto out_free_adapter; } } @@ -6712,10 +6759,13 @@ sriov: out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); - out_unmap_bar0: - iounmap(adapter->regs); out_free_adapter: + if (adapter->workq) + destroy_workqueue(adapter->workq); + kfree(adapter); + out_unmap_bar0: + iounmap(regs); out_disable_device: pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); @@ -6736,6 +6786,11 @@ static void remove_one(struct pci_dev *pdev) if (adapter) { int i; + /* Tear down per-adapter Work Queue first since it can contain + * references to our adapter data structure. + */ + destroy_workqueue(adapter->workq); + if (is_offload(adapter)) detach_ulds(adapter); @@ -6788,20 +6843,14 @@ static int __init cxgb4_init_module(void) { int ret; - workq = create_singlethread_workqueue("cxgb4"); - if (!workq) - return -ENOMEM; - /* Debugfs support is optional, just warn if this fails */ cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); if (!cxgb4_debugfs_root) pr_warn("could not create debugfs entry, continuing\n"); ret = pci_register_driver(&cxgb4_driver); - if (ret < 0) { + if (ret < 0) debugfs_remove(cxgb4_debugfs_root); - destroy_workqueue(workq); - } register_inet6addr_notifier(&cxgb4_inet6addr_notifier); @@ -6813,8 +6862,6 @@ static void __exit cxgb4_cleanup_module(void) unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier); pci_unregister_driver(&cxgb4_driver); debugfs_remove(cxgb4_debugfs_root); /* NULL ok */ - flush_workqueue(workq); - destroy_workqueue(workq); } module_init(cxgb4_init_module); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b0bba32d69d5..5e1b314e11af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -203,6 +203,9 @@ enum { RX_LARGE_MTU_BUF = 0x3, /* large MTU buffer */ }; +static int timer_pkt_quota[] = {1, 1, 2, 3, 4, 5}; +#define MIN_NAPI_WORK 1 + static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d) { return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS; @@ -521,9 +524,23 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) val = PIDX(q->pend_cred / 8); if (!is_t4(adap->params.chip)) val |= DBTYPE(1); + val |= DBPRIO(1); wmb(); - t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) | - QID(q->cntxt_id) | val); + + /* If we're on T4, use the old doorbell mechanism; otherwise + * use the new BAR2 mechanism. + */ + if (is_t4(adap->params.chip)) { + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), + val | QID(q->cntxt_id)); + } else { + writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL); + + /* This Write memory Barrier will force the write to + * the User Doorbell area to be flushed. + */ + wmb(); + } q->pend_cred &= 7; } } @@ -833,13 +850,14 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, *end = 0; } -/* This function copies 64 byte coalesced work request to - * memory mapped BAR2 space(user space writes). - * For coalesced WR SGE, fetches data from the FIFO instead of from Host. +/* This function copies a tx_desc struct to memory mapped BAR2 space(user space + * writes). For coalesced WR SGE, fetches data from the FIFO instead of from + * Host. */ -static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) +static void cxgb_pio_copy(u64 __iomem *dst, struct tx_desc *desc) { - int count = 8; + int count = sizeof(*desc) / sizeof(u64); + u64 *src = (u64 *)desc; while (count) { writeq(*src, dst); @@ -859,30 +877,63 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) */ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { - unsigned int *wr, index; - unsigned long flags; - wmb(); /* write descriptors before telling HW */ - spin_lock_irqsave(&q->db_lock, flags); - if (!q->db_disabled) { - if (is_t4(adap->params.chip)) { + + if (is_t4(adap->params.chip)) { + u32 val = PIDX(n); + unsigned long flags; + + /* For T4 we need to participate in the Doorbell Recovery + * mechanism. + */ + spin_lock_irqsave(&q->db_lock, flags); + if (!q->db_disabled) t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), - QID(q->cntxt_id) | PIDX(n)); + QID(q->cntxt_id) | val); + else + q->db_pidx_inc += n; + q->db_pidx = q->pidx; + spin_unlock_irqrestore(&q->db_lock, flags); + } else { + u32 val = PIDX_T5(n); + + /* T4 and later chips share the same PIDX field offset within + * the doorbell, but T5 and later shrank the field in order to + * gain a bit for Doorbell Priority. The field was absurdly + * large in the first place (14 bits) so we just use the T5 + * and later limits and warn if a Queue ID is too large. + */ + WARN_ON(val & DBPRIO(1)); + + /* For T5 and later we use the Write-Combine mapped BAR2 User + * Doorbell mechanism. If we're only writing a single TX + * Descriptor and TX Write Combining hasn't been disabled, we + * can use the Write Combining Gather Buffer; otherwise we use + * the simple doorbell. + */ + if (n == 1) { + int index = (q->pidx + ? (q->pidx - 1) + : (q->size - 1)); + + cxgb_pio_copy(adap->bar2 + q->udb + SGE_UDB_WCDOORBELL, + q->desc + index); } else { - if (n == 1) { - index = q->pidx ? (q->pidx - 1) : (q->size - 1); - wr = (unsigned int *)&q->desc[index]; - cxgb_pio_copy((u64 __iomem *) - (adap->bar2 + q->udb + 64), - (u64 *)wr); - } else - writel(n, adap->bar2 + q->udb + 8); - wmb(); + writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL); } - } else - q->db_pidx_inc += n; - q->db_pidx = q->pidx; - spin_unlock_irqrestore(&q->db_lock, flags); + + /* This Write Memory Barrier will force the write to the User + * Doorbell area to be flushed. This is needed to prevent + * writes on different CPUs for the same queue from hitting + * the adapter out of order. This is required when some Work + * Requests take the Write Combine Gather Buffer path (user + * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some + * take the traditional path where we simply increment the + * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the + * hardware DMA read the actual Work Request. + */ + wmb(); + } } /** @@ -1072,7 +1123,10 @@ out_free: dev_kfree_skb_any(skb); lso->c.ipid_ofst = htons(0); lso->c.mss = htons(ssi->gso_size); lso->c.seqno_offset = htonl(0); - lso->c.len = htonl(skb->len); + if (is_t4(adap->params.chip)) + lso->c.len = htonl(skb->len); + else + lso->c.len = htonl(LSO_T5_XFER_SIZE(skb->len)); cpl = (void *)(lso + 1); cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | TXPKT_IPHDR_LEN(l3hdr_len) | @@ -1916,16 +1970,40 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) unsigned int params; struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); int work_done = process_responses(q, budget); + u32 val; if (likely(work_done < budget)) { + int timer_index; + napi_complete(napi); - params = q->next_intr_params; - q->next_intr_params = q->intr_params; + timer_index = QINTR_TIMER_IDX_GET(q->next_intr_params); + + if (q->adaptive_rx) { + if (work_done > max(timer_pkt_quota[timer_index], + MIN_NAPI_WORK)) + timer_index = (timer_index + 1); + else + timer_index = timer_index - 1; + + timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1); + q->next_intr_params = QINTR_TIMER_IDX(timer_index) | + V_QINTR_CNT_EN; + params = q->next_intr_params; + } else { + params = q->next_intr_params; + q->next_intr_params = q->intr_params; + } } else params = QINTR_TIMER_IDX(7); - t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) | - INGRESSQID((u32)q->cntxt_id) | SEINTARM(params)); + val = CIDXINC(work_done) | SEINTARM(params); + if (is_t4(q->adap->params.chip)) { + t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), + val | INGRESSQID((u32)q->cntxt_id)); + } else { + writel(val, q->adap->bar2 + q->udb + SGE_UDB_GTS); + wmb(); + } return work_done; } @@ -1949,6 +2027,7 @@ static unsigned int process_intrq(struct adapter *adap) unsigned int credits; const struct rsp_ctrl *rc; struct sge_rspq *q = &adap->sge.intrq; + u32 val; spin_lock(&adap->sge.intrq_lock); for (credits = 0; ; credits++) { @@ -1967,8 +2046,14 @@ static unsigned int process_intrq(struct adapter *adap) rspq_next(q); } - t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) | - INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params)); + val = CIDXINC(credits) | SEINTARM(q->intr_params); + if (is_t4(adap->params.chip)) { + t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), + val | INGRESSQID(q->cntxt_id)); + } else { + writel(val, adap->bar2 + q->udb + SGE_UDB_GTS); + wmb(); + } spin_unlock(&adap->sge.intrq_lock); return credits; } @@ -2149,6 +2234,51 @@ static void sge_tx_timer_cb(unsigned long data) mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); } +/** + * udb_address - return the BAR2 User Doorbell address for a Queue + * @adap: the adapter + * @cntxt_id: the Queue Context ID + * @qpp: Queues Per Page (for all PFs) + * + * Returns the BAR2 address of the user Doorbell associated with the + * indicated Queue Context ID. Note that this is only applicable + * for T5 and later. + */ +static u64 udb_address(struct adapter *adap, unsigned int cntxt_id, + unsigned int qpp) +{ + u64 udb; + unsigned int s_qpp; + unsigned short udb_density; + unsigned long qpshift; + int page; + + BUG_ON(is_t4(adap->params.chip)); + + s_qpp = (QUEUESPERPAGEPF0 + + (QUEUESPERPAGEPF1 - QUEUESPERPAGEPF0) * adap->fn); + udb_density = 1 << ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK); + qpshift = PAGE_SHIFT - ilog2(udb_density); + udb = (u64)cntxt_id << qpshift; + udb &= PAGE_MASK; + page = udb / PAGE_SIZE; + udb += (cntxt_id - (page * udb_density)) * SGE_UDB_SIZE; + + return udb; +} + +static u64 udb_address_eq(struct adapter *adap, unsigned int cntxt_id) +{ + return udb_address(adap, cntxt_id, + t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF)); +} + +static u64 udb_address_iq(struct adapter *adap, unsigned int cntxt_id) +{ + return udb_address(adap, cntxt_id, + t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF)); +} + int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, struct sge_fl *fl, rspq_handler_t hnd) @@ -2214,6 +2344,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->next_intr_params = iq->intr_params; iq->cntxt_id = ntohs(c.iqid); iq->abs_id = ntohs(c.physiqid); + if (!is_t4(adap->params.chip)) + iq->udb = udb_address_iq(adap, iq->cntxt_id); iq->size--; /* subtract status entry */ iq->netdev = dev; iq->handler = hnd; @@ -2229,6 +2361,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, fl->pidx = fl->cidx = 0; fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; + + /* Note, we must initialize the Free List User Doorbell + * address before refilling the Free List! + */ + if (!is_t4(adap->params.chip)) + fl->udb = udb_address_eq(adap, fl->cntxt_id); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } return 0; @@ -2254,21 +2392,8 @@ err: static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) { q->cntxt_id = id; - if (!is_t4(adap->params.chip)) { - unsigned int s_qpp; - unsigned short udb_density; - unsigned long qpshift; - int page; - - s_qpp = QUEUESPERPAGEPF1 * adap->fn; - udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap, - SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp)); - qpshift = PAGE_SHIFT - ilog2(udb_density); - q->udb = q->cntxt_id << qpshift; - q->udb &= PAGE_MASK; - page = q->udb / PAGE_SIZE; - q->udb += (q->cntxt_id - (page * udb_density)) * 128; - } + if (!is_t4(adap->params.chip)) + q->udb = udb_address_eq(adap, q->cntxt_id); q->in_use = 0; q->cidx = q->pidx = 0; @@ -2303,7 +2428,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, FW_EQ_ETH_CMD_PFN(adap->fn) | FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC | FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); - c.viid_pkd = htonl(FW_EQ_ETH_CMD_VIID(pi->viid)); + c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE | + FW_EQ_ETH_CMD_VIID(pi->viid)); c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE(2) | FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | FW_EQ_ETH_CMD_FETCHRO(1) | diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index a853133d8db8..1fff1495fe31 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -168,6 +168,34 @@ void t4_hw_pci_read_cfg4(struct adapter *adap, int reg, u32 *val) } /* + * t4_report_fw_error - report firmware error + * @adap: the adapter + * + * The adapter firmware can indicate error conditions to the host. + * If the firmware has indicated an error, print out the reason for + * the firmware error. + */ +static void t4_report_fw_error(struct adapter *adap) +{ + static const char *const reason[] = { + "Crash", /* PCIE_FW_EVAL_CRASH */ + "During Device Preparation", /* PCIE_FW_EVAL_PREP */ + "During Device Configuration", /* PCIE_FW_EVAL_CONF */ + "During Device Initialization", /* PCIE_FW_EVAL_INIT */ + "Unexpected Event", /* PCIE_FW_EVAL_UNEXPECTEDEVENT */ + "Insufficient Airflow", /* PCIE_FW_EVAL_OVERHEAT */ + "Device Shutdown", /* PCIE_FW_EVAL_DEVICESHUTDOWN */ + "Reserved", /* reserved */ + }; + u32 pcie_fw; + + pcie_fw = t4_read_reg(adap, MA_PCIE_FW); + if (pcie_fw & FW_PCIE_FW_ERR) + dev_err(adap->pdev_dev, "Firmware reports adapter error: %s\n", + reason[FW_PCIE_FW_EVAL_GET(pcie_fw)]); +} + +/* * Get the reply to a mailbox command and store it in @rpl in big-endian order. */ static void get_mbox_rpl(struct adapter *adap, __be64 *rpl, int nflit, @@ -300,6 +328,7 @@ int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, dump_mbox(adap, mbox, data_reg); dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n", *(const u8 *)cmd, mbox); + t4_report_fw_error(adap); return -ETIMEDOUT; } @@ -566,6 +595,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, #define VPD_BASE 0x400 #define VPD_BASE_OLD 0 #define VPD_LEN 1024 +#define CHELSIO_VPD_UNIQUE_ID 0x82 /** * t4_seeprom_wp - enable/disable EEPROM write protection @@ -603,7 +633,14 @@ int get_vpd_params(struct adapter *adapter, struct vpd_params *p) ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd); if (ret < 0) goto out; - addr = *vpd == 0x82 ? VPD_BASE : VPD_BASE_OLD; + + /* The VPD shall have a unique identifier specified by the PCI SIG. + * For chelsio adapters, the identifier is 0x82. The first byte of a VPD + * shall be CHELSIO_VPD_UNIQUE_ID (0x82). The VPD programming software + * is expected to automatically put this entry at the + * beginning of the VPD. + */ + addr = *vpd == CHELSIO_VPD_UNIQUE_ID ? VPD_BASE : VPD_BASE_OLD; ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd); if (ret < 0) @@ -667,6 +704,7 @@ int get_vpd_params(struct adapter *adapter, struct vpd_params *p) i = pci_vpd_info_field_size(vpd + sn - PCI_VPD_INFO_FLD_HDR_SIZE); memcpy(p->sn, vpd + sn, min(i, SERNUM_LEN)); strim(p->sn); + i = pci_vpd_info_field_size(vpd + pn - PCI_VPD_INFO_FLD_HDR_SIZE); memcpy(p->pn, vpd + pn, min(i, PN_LEN)); strim(p->pn); @@ -1061,6 +1099,9 @@ static int t4_flash_erase_sectors(struct adapter *adapter, int start, int end) { int ret = 0; + if (end >= adapter->params.sf_nsec) + return -EINVAL; + while (start <= end) { if ((ret = sf1_write(adapter, 1, 0, 1, SF_WR_ENABLE)) != 0 || (ret = sf1_write(adapter, 4, 0, 1, @@ -1394,15 +1435,18 @@ static void pcie_intr_handler(struct adapter *adapter) int fat; - fat = t4_handle_intr_status(adapter, - PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, - sysbus_intr_info) + - t4_handle_intr_status(adapter, - PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, - pcie_port_intr_info) + - t4_handle_intr_status(adapter, PCIE_INT_CAUSE, - is_t4(adapter->params.chip) ? - pcie_intr_info : t5_pcie_intr_info); + if (is_t4(adapter->params.chip)) + fat = t4_handle_intr_status(adapter, + PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, + sysbus_intr_info) + + t4_handle_intr_status(adapter, + PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, + pcie_port_intr_info) + + t4_handle_intr_status(adapter, PCIE_INT_CAUSE, + pcie_intr_info); + else + fat = t4_handle_intr_status(adapter, PCIE_INT_CAUSE, + t5_pcie_intr_info); if (fat) t4_fatal_err(adapter); @@ -1521,6 +1565,9 @@ static void cim_intr_handler(struct adapter *adapter) int fat; + if (t4_read_reg(adapter, MA_PCIE_FW) & FW_PCIE_FW_ERR) + t4_report_fw_error(adapter); + fat = t4_handle_intr_status(adapter, CIM_HOST_INT_CAUSE, cim_intr_info) + t4_handle_intr_status(adapter, CIM_HOST_UPACC_INT_CAUSE, @@ -1768,10 +1815,16 @@ static void ma_intr_handler(struct adapter *adap) { u32 v, status = t4_read_reg(adap, MA_INT_CAUSE); - if (status & MEM_PERR_INT_CAUSE) + if (status & MEM_PERR_INT_CAUSE) { dev_alert(adap->pdev_dev, "MA parity error, parity status %#x\n", t4_read_reg(adap, MA_PARITY_ERROR_STATUS)); + if (is_t5(adap->params.chip)) + dev_alert(adap->pdev_dev, + "MA parity error, parity status %#x\n", + t4_read_reg(adap, + MA_PARITY_ERROR_STATUS2)); + } if (status & MEM_WRAP_INT_CAUSE) { v = t4_read_reg(adap, MA_INT_WRAP_STATUS); dev_alert(adap->pdev_dev, "MA address wrap-around error by " @@ -2733,12 +2786,16 @@ retry: /* * Issue the HELLO command to the firmware. If it's not successful * but indicates that we got a "busy" or "timeout" condition, retry - * the HELLO until we exhaust our retry limit. + * the HELLO until we exhaust our retry limit. If we do exceed our + * retry limit, check to see if the firmware left us any error + * information and report that if so. */ ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); if (ret < 0) { if ((ret == -EBUSY || ret == -ETIMEDOUT) && retries-- > 0) goto retry; + if (t4_read_reg(adap, MA_PCIE_FW) & FW_PCIE_FW_ERR) + t4_report_fw_error(adap); return ret; } @@ -3742,6 +3799,7 @@ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl) lc->link_ok = link_ok; lc->speed = speed; lc->fc = fc; + lc->supported = be16_to_cpu(p->u.info.pcap); t4_os_link_changed(adap, port, link_ok); } if (mod != pi->mod_type) { @@ -3787,16 +3845,35 @@ static void init_link_config(struct link_config *lc, unsigned int caps) } } -int t4_wait_dev_ready(struct adapter *adap) +#define CIM_PF_NOACCESS 0xeeeeeeee + +int t4_wait_dev_ready(void __iomem *regs) { - if (t4_read_reg(adap, PL_WHOAMI) != 0xffffffff) + u32 whoami; + + whoami = readl(regs + PL_WHOAMI); + if (whoami != 0xffffffff && whoami != CIM_PF_NOACCESS) return 0; + msleep(500); - return t4_read_reg(adap, PL_WHOAMI) != 0xffffffff ? 0 : -EIO; + whoami = readl(regs + PL_WHOAMI); + return (whoami != 0xffffffff && whoami != CIM_PF_NOACCESS ? 0 : -EIO); } +struct flash_desc { + u32 vendor_and_model_id; + u32 size_mb; +}; + static int get_flash_params(struct adapter *adap) { + /* Table for non-Numonix supported flash parts. Numonix parts are left + * to the preexisting code. All flash parts have 64KB sectors. + */ + static struct flash_desc supported_flash[] = { + { 0x150201, 4 << 20 }, /* Spansion 4MB S25FL032P */ + }; + int ret; u32 info; @@ -3807,6 +3884,14 @@ static int get_flash_params(struct adapter *adap) if (ret) return ret; + for (ret = 0; ret < ARRAY_SIZE(supported_flash); ++ret) + if (supported_flash[ret].vendor_and_model_id == info) { + adap->params.sf_size = supported_flash[ret].size_mb; + adap->params.sf_nsec = + adap->params.sf_size / SF_SEC_SIZE; + return 0; + } + if ((info & 0xff) != 0x20) /* not a Numonix flash */ return -EINVAL; info >>= 16; /* log2 of size */ @@ -3819,6 +3904,10 @@ static int get_flash_params(struct adapter *adap) adap->params.sf_size = 1 << info; adap->params.sf_fw_start = t4_read_reg(adap, CIM_BOOT_CFG) & BOOTADDR_MASK; + + if (adap->params.sf_size < FLASH_MIN_SIZE) + dev_warn(adap->pdev_dev, "WARNING!!! FLASH size %#x < %#x!!!\n", + adap->params.sf_size, FLASH_MIN_SIZE); return 0; } @@ -3837,10 +3926,6 @@ int t4_prep_adapter(struct adapter *adapter) uint16_t device_id; u32 pl_rev; - ret = t4_wait_dev_ready(adapter); - if (ret < 0) - return ret; - get_pci_mode(adapter, &adapter->params.pci); pl_rev = G_REV(t4_read_reg(adapter, PL_REV)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 35e3d8e32881..c19a90e7f7d1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -135,6 +135,7 @@ struct rsp_ctrl { #define RSPD_GEN(x) ((x) >> 7) #define RSPD_TYPE(x) (((x) >> 4) & 3) +#define V_QINTR_CNT_EN 0x0 #define QINTR_CNT_EN 0x1 #define QINTR_TIMER_IDX(x) ((x) << 1) #define QINTR_TIMER_IDX_GET(x) (((x) >> 1) & 0x7) @@ -175,7 +176,7 @@ enum { * Location of firmware image in FLASH. */ FLASH_FW_START_SEC = 8, - FLASH_FW_NSECS = 8, + FLASH_FW_NSECS = 16, FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC), FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS), @@ -206,6 +207,12 @@ enum { FLASH_CFG_START = FLASH_START(FLASH_CFG_START_SEC), FLASH_CFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_CFG_NSECS), + /* We don't support FLASH devices which can't support the full + * standard set of sections which we need for normal + * operations. + */ + FLASH_MIN_SIZE = FLASH_CFG_START + FLASH_CFG_MAX_SIZE, + FLASH_FPGA_CFG_START_SEC = 15, FLASH_FPGA_CFG_START = FLASH_START(FLASH_FPGA_CFG_START_SEC), diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 52e08103f221..5f4db2398c71 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -527,6 +527,7 @@ struct cpl_tx_pkt_lso_core { #define LSO_LAST_SLICE (1 << 22) #define LSO_FIRST_SLICE (1 << 23) #define LSO_OPCODE(x) ((x) << 24) +#define LSO_T5_XFER_SIZE(x) ((x) << 0) __be16 ipid_ofst; __be16 mss; __be32 seqno_offset; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index e3146e83df20..a1024db5dc13 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -72,11 +72,11 @@ #define PIDX_MASK 0x00003fffU #define PIDX_SHIFT 0 #define PIDX(x) ((x) << PIDX_SHIFT) -#define S_PIDX_T5 0 -#define M_PIDX_T5 0x1fffU -#define PIDX_T5(x) (((x) >> S_PIDX_T5) & M_PIDX_T5) +#define PIDX_SHIFT_T5 0 +#define PIDX_T5(x) ((x) << PIDX_SHIFT_T5) +#define SGE_TIMERREGS 6 #define SGE_PF_GTS 0x4 #define INGRESSQID_MASK 0xffff0000U #define INGRESSQID_SHIFT 16 @@ -157,8 +157,27 @@ #define QUEUESPERPAGEPF0_MASK 0x0000000fU #define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) +#define QUEUESPERPAGEPF0 0 #define QUEUESPERPAGEPF1 4 +/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues. + * The User Doorbells are each 128 bytes in length with a Simple Doorbell at + * offsets 8x and a Write Combining single 64-byte Egress Queue Unit + * (X_IDXSIZE_UNIT) Gather Buffer interface at offset 64. For Ingress Queues, + * we have a Going To Sleep register at offsets 8x+4. + * + * As noted above, we have many instances of the Simple Doorbell and Going To + * Sleep registers at offsets 8x and 8x+4, respectively. We want to use a + * non-64-byte aligned offset for the Simple Doorbell in order to attempt to + * avoid buffering of the writes to the Simple Doorbell and we want to use a + * non-contiguous offset for the Going To Sleep writes in order to avoid + * possible combining between them. + */ +#define SGE_UDB_SIZE 128 +#define SGE_UDB_KDOORBELL 8 +#define SGE_UDB_GTS 20 +#define SGE_UDB_WCDOORBELL 64 + #define SGE_INT_CAUSE1 0x1024 #define SGE_INT_CAUSE2 0x1030 #define SGE_INT_CAUSE3 0x103c @@ -511,6 +530,7 @@ #define MEM_WRAP_CLIENT_NUM_GET(x) (((x) & MEM_WRAP_CLIENT_NUM_MASK) >> MEM_WRAP_CLIENT_NUM_SHIFT) #define MA_PCIE_FW 0x30b8 #define MA_PARITY_ERROR_STATUS 0x77f4 +#define MA_PARITY_ERROR_STATUS2 0x7804 #define MA_EXT_MEMORY1_BAR 0x7808 #define EDC_0_BASE_ADDR 0x7900 @@ -959,6 +979,7 @@ #define TRCMULTIFILTER 0x00000001U #define MPS_TRC_RSS_CONTROL 0x9808 +#define MPS_T5_TRC_RSS_CONTROL 0xa00c #define RSSCONTROL_MASK 0x00ff0000U #define RSSCONTROL_SHIFT 16 #define RSSCONTROL(x) ((x) << RSSCONTROL_SHIFT) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 0549170d7e2e..3409756a85b9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1227,6 +1227,7 @@ struct fw_eq_eth_cmd { #define FW_EQ_ETH_CMD_CIDXFTHRESH(x) ((x) << 16) #define FW_EQ_ETH_CMD_EQSIZE(x) ((x) << 0) +#define FW_EQ_ETH_CMD_AUTOEQUEQE (1U << 30) #define FW_EQ_ETH_CMD_VIID(x) ((x) << 16) struct fw_eq_ctrl_cmd { @@ -2227,6 +2228,10 @@ struct fw_debug_cmd { #define FW_PCIE_FW_MASTER(x) ((x) << FW_PCIE_FW_MASTER_SHIFT) #define FW_PCIE_FW_MASTER_GET(x) (((x) >> FW_PCIE_FW_MASTER_SHIFT) & \ FW_PCIE_FW_MASTER_MASK) +#define FW_PCIE_FW_EVAL_MASK 0x7 +#define FW_PCIE_FW_EVAL_SHIFT 24 +#define FW_PCIE_FW_EVAL_GET(x) (((x) >> FW_PCIE_FW_EVAL_SHIFT) & \ + FW_PCIE_FW_EVAL_MASK) struct fw_hdr { u8 ver; |