diff options
Diffstat (limited to 'drivers/net/ethernet/cavium/liquidio/request_manager.c')
-rw-r--r-- | drivers/net/ethernet/cavium/liquidio/request_manager.c | 217 |
1 files changed, 135 insertions, 82 deletions
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index a2a24652c8f3..7eafa75ac095 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -51,7 +51,7 @@ struct iq_post_status { }; static void check_db_timeout(struct work_struct *work); -static void __check_db_timeout(struct octeon_device *oct, unsigned long iq_no); +static void __check_db_timeout(struct octeon_device *oct, u64 iq_no); static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *); @@ -69,12 +69,16 @@ static inline int IQ_INSTR_MODE_64B(struct octeon_device *oct, int iq_no) /* Return 0 on success, 1 on failure */ int octeon_init_instr_queue(struct octeon_device *oct, - u32 iq_no, u32 num_descs) + union oct_txpciq txpciq, + u32 num_descs) { struct octeon_instr_queue *iq; struct octeon_iq_config *conf = NULL; + u32 iq_no = (u32)txpciq.s.q_no; u32 q_size; struct cavium_wq *db_wq; + int orig_node = dev_to_node(&oct->pci_dev->dev); + int numa_node = cpu_to_node(iq_no % num_online_cpus()); if (OCTEON_CN6XXX(oct)) conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf))); @@ -95,9 +99,15 @@ int octeon_init_instr_queue(struct octeon_device *oct, q_size = (u32)conf->instr_type * num_descs; iq = oct->instr_queue[iq_no]; + iq->oct_dev = oct; + set_dev_node(&oct->pci_dev->dev, numa_node); iq->base_addr = lio_dma_alloc(oct, q_size, (dma_addr_t *)&iq->base_addr_dma); + set_dev_node(&oct->pci_dev->dev, orig_node); + if (!iq->base_addr) + iq->base_addr = lio_dma_alloc(oct, q_size, + (dma_addr_t *)&iq->base_addr_dma); if (!iq->base_addr) { dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n", iq_no); @@ -109,7 +119,11 @@ int octeon_init_instr_queue(struct octeon_device *oct, /* Initialize a list to holds requests that have been posted to Octeon * but has yet to be fetched by octeon */ - iq->request_list = vmalloc(sizeof(*iq->request_list) * num_descs); + iq->request_list = vmalloc_node((sizeof(*iq->request_list) * num_descs), + numa_node); + if (!iq->request_list) + iq->request_list = vmalloc(sizeof(*iq->request_list) * + num_descs); if (!iq->request_list) { lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma); dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n", @@ -122,7 +136,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n", iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count); - iq->iq_no = iq_no; + iq->txpciq.u64 = txpciq.u64; iq->fill_threshold = (u32)conf->db_min; iq->fill_cnt = 0; iq->host_write_index = 0; @@ -135,8 +149,11 @@ int octeon_init_instr_queue(struct octeon_device *oct, /* Initialize the spinlock for this instruction queue */ spin_lock_init(&iq->lock); + spin_lock_init(&iq->post_lock); - oct->io_qmask.iq |= (1 << iq_no); + spin_lock_init(&iq->iq_flush_running_lock); + + oct->io_qmask.iq |= (1ULL << iq_no); /* Set the 32B/64B mode for each input queue */ oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no); @@ -144,7 +161,9 @@ int octeon_init_instr_queue(struct octeon_device *oct, oct->fn_list.setup_iq_regs(oct, iq_no); - oct->check_db_wq[iq_no].wq = create_workqueue("check_iq_db"); + oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db", + WQ_MEM_RECLAIM, + 0); if (!oct->check_db_wq[iq_no].wq) { lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma); dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n", @@ -168,7 +187,6 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) struct octeon_instr_queue *iq = oct->instr_queue[iq_no]; cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work); - flush_workqueue(oct->check_db_wq[iq_no].wq); destroy_workqueue(oct->check_db_wq[iq_no].wq); if (OCTEON_CN6XXX(oct)) @@ -188,26 +206,38 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) /* Return 0 on success, 1 on failure */ int octeon_setup_iq(struct octeon_device *oct, - u32 iq_no, + int ifidx, + int q_index, + union oct_txpciq txpciq, u32 num_descs, void *app_ctx) { + u32 iq_no = (u32)txpciq.s.q_no; + int numa_node = cpu_to_node(iq_no % num_online_cpus()); + if (oct->instr_queue[iq_no]) { dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n", iq_no); + oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64; oct->instr_queue[iq_no]->app_ctx = app_ctx; return 0; } oct->instr_queue[iq_no] = - vmalloc(sizeof(struct octeon_instr_queue)); + vmalloc_node(sizeof(struct octeon_instr_queue), numa_node); + if (!oct->instr_queue[iq_no]) + oct->instr_queue[iq_no] = + vmalloc(sizeof(struct octeon_instr_queue)); if (!oct->instr_queue[iq_no]) return 1; memset(oct->instr_queue[iq_no], 0, sizeof(struct octeon_instr_queue)); + oct->instr_queue[iq_no]->q_index = q_index; oct->instr_queue[iq_no]->app_ctx = app_ctx; - if (octeon_init_instr_queue(oct, iq_no, num_descs)) { + oct->instr_queue[iq_no]->ifidx = ifidx; + + if (octeon_init_instr_queue(oct, txpciq, num_descs)) { vfree(oct->instr_queue[iq_no]); oct->instr_queue[iq_no] = NULL; return 1; @@ -226,8 +256,8 @@ int lio_wait_for_instr_fetch(struct octeon_device *oct) instr_cnt = 0; /*for (i = 0; i < oct->num_iqs; i++) {*/ - for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) { - if (!(oct->io_qmask.iq & (1UL << i))) + for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) { + if (!(oct->io_qmask.iq & (1ULL << i))) continue; pending = atomic_read(&oct-> @@ -364,13 +394,13 @@ __add_to_request_list(struct octeon_instr_queue *iq, int lio_process_iq_request_list(struct octeon_device *oct, - struct octeon_instr_queue *iq) + struct octeon_instr_queue *iq, u32 napi_budget) { int reqtype; void *buf; u32 old = iq->flush_index; u32 inst_count = 0; - unsigned pkts_compl = 0, bytes_compl = 0; + unsigned int pkts_compl = 0, bytes_compl = 0; struct octeon_soft_command *sc; struct octeon_instr_irh *irh; @@ -394,7 +424,7 @@ lio_process_iq_request_list(struct octeon_device *oct, case REQTYPE_SOFT_COMMAND: sc = buf; - irh = (struct octeon_instr_irh *)&sc->cmd.irh; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; if (irh->rflag) { /* We're expecting a response from Octeon. * It's up to lio_process_ordered_list() to @@ -430,6 +460,9 @@ lio_process_iq_request_list(struct octeon_device *oct, skip_this: inst_count++; INCR_INDEX_BY1(old, iq->max_count); + + if ((napi_budget) && (inst_count >= napi_budget)) + break; } if (bytes_compl) octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl, @@ -439,38 +472,63 @@ lio_process_iq_request_list(struct octeon_device *oct, return inst_count; } -static inline void -update_iq_indices(struct octeon_device *oct, struct octeon_instr_queue *iq) +/* Can only be called from process context */ +int +octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq, + u32 pending_thresh, u32 napi_budget) { u32 inst_processed = 0; + u32 tot_inst_processed = 0; + int tx_done = 1; - /* Calculate how many commands Octeon has read and move the read index - * accordingly. - */ - iq->octeon_read_index = oct->fn_list.update_iq_read_idx(oct, iq); + if (!spin_trylock(&iq->iq_flush_running_lock)) + return tx_done; - /* Move the NORESPONSE requests to the per-device completion list. */ - if (iq->flush_index != iq->octeon_read_index) - inst_processed = lio_process_iq_request_list(oct, iq); + spin_lock_bh(&iq->lock); - if (inst_processed) { - atomic_sub(inst_processed, &iq->instr_pending); - iq->stats.instr_processed += inst_processed; - } -} + iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq); -static void -octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq, - u32 pending_thresh) -{ if (atomic_read(&iq->instr_pending) >= (s32)pending_thresh) { - spin_lock_bh(&iq->lock); - update_iq_indices(oct, iq); - spin_unlock_bh(&iq->lock); + do { + /* Process any outstanding IQ packets. */ + if (iq->flush_index == iq->octeon_read_index) + break; + + if (napi_budget) + inst_processed = lio_process_iq_request_list + (oct, iq, + napi_budget - tot_inst_processed); + else + inst_processed = + lio_process_iq_request_list(oct, iq, 0); + + if (inst_processed) { + atomic_sub(inst_processed, &iq->instr_pending); + iq->stats.instr_processed += inst_processed; + } + + tot_inst_processed += inst_processed; + inst_processed = 0; + + } while (tot_inst_processed < napi_budget); + + if (napi_budget && (tot_inst_processed >= napi_budget)) + tx_done = 0; } + + iq->last_db_time = jiffies; + + spin_unlock_bh(&iq->lock); + + spin_unlock(&iq->iq_flush_running_lock); + + return tx_done; } -static void __check_db_timeout(struct octeon_device *oct, unsigned long iq_no) +/* Process instruction queue after timeout. + * This routine gets called from a workqueue or when removing the module. + */ +static void __check_db_timeout(struct octeon_device *oct, u64 iq_no) { struct octeon_instr_queue *iq; u64 next_time; @@ -481,24 +539,17 @@ static void __check_db_timeout(struct octeon_device *oct, unsigned long iq_no) if (!iq) return; + /* return immediately, if no work pending */ + if (!atomic_read(&iq->instr_pending)) + return; /* If jiffies - last_db_time < db_timeout do nothing */ next_time = iq->last_db_time + iq->db_timeout; if (!time_after(jiffies, (unsigned long)next_time)) return; iq->last_db_time = jiffies; - /* Get the lock and prevent tasklets. This routine gets called from - * the poll thread. Instructions can now be posted in tasklet context - */ - spin_lock_bh(&iq->lock); - if (iq->fill_cnt != 0) - ring_doorbell(oct, iq); - - spin_unlock_bh(&iq->lock); - /* Flush the instruction queue */ - if (iq->do_auto_flush) - octeon_flush_iq(oct, iq, 1); + octeon_flush_iq(oct, iq, 1, 0); } /* Called by the Poll thread at regular intervals to check the instruction @@ -523,7 +574,10 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, struct iq_post_status st; struct octeon_instr_queue *iq = oct->instr_queue[iq_no]; - spin_lock_bh(&iq->lock); + /* Get the lock and prevent other tasks and tx interrupt handler from + * running. + */ + spin_lock_bh(&iq->post_lock); st = __post_command2(oct, iq, force_db, cmd); @@ -539,10 +593,13 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1); } - spin_unlock_bh(&iq->lock); + spin_unlock_bh(&iq->post_lock); - if (iq->do_auto_flush) - octeon_flush_iq(oct, iq, 2); + /* This is only done here to expedite packets being flushed + * for cases where there are no IQ completion interrupts. + */ + /*if (iq->do_auto_flush)*/ + /* octeon_flush_iq(oct, iq, 2, 0);*/ return st.status; } @@ -557,7 +614,7 @@ octeon_prepare_soft_command(struct octeon_device *oct, u64 ossp1) { struct octeon_config *oct_cfg; - struct octeon_instr_ih *ih; + struct octeon_instr_ih2 *ih2; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; @@ -566,73 +623,69 @@ octeon_prepare_soft_command(struct octeon_device *oct, oct_cfg = octeon_get_conf(oct); - ih = (struct octeon_instr_ih *)&sc->cmd.ih; - ih->tagtype = ATOMIC_TAG; - ih->tag = LIO_CONTROL; - ih->raw = 1; - ih->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + ih2->tagtype = ATOMIC_TAG; + ih2->tag = LIO_CONTROL; + ih2->raw = 1; + ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); if (sc->datasize) { - ih->dlengsz = sc->datasize; - ih->rs = 1; + ih2->dlengsz = sc->datasize; + ih2->rs = 1; } - irh = (struct octeon_instr_irh *)&sc->cmd.irh; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; irh->opcode = opcode; irh->subcode = subcode; /* opcode/subcode specific parameters (ossp) */ irh->ossp = irh_ossp; - sc->cmd.ossp[0] = ossp0; - sc->cmd.ossp[1] = ossp1; + sc->cmd.cmd2.ossp[0] = ossp0; + sc->cmd.cmd2.ossp[1] = ossp1; if (sc->rdatasize) { - rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; rdp->pcie_port = oct->pcie_port; rdp->rlen = sc->rdatasize; irh->rflag = 1; - irh->len = 4; - ih->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ + ih2->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ } else { irh->rflag = 0; - irh->len = 2; - ih->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */ + ih2->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */ } - - while (!(oct->io_qmask.iq & (1 << sc->iq_no))) - sc->iq_no++; } int octeon_send_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) { - struct octeon_instr_ih *ih; + struct octeon_instr_ih2 *ih2; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; + u32 len; - ih = (struct octeon_instr_ih *)&sc->cmd.ih; - if (ih->dlengsz) { - BUG_ON(!sc->dmadptr); - sc->cmd.dptr = sc->dmadptr; + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + if (ih2->dlengsz) { + WARN_ON(!sc->dmadptr); + sc->cmd.cmd2.dptr = sc->dmadptr; } - - irh = (struct octeon_instr_irh *)&sc->cmd.irh; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; if (irh->rflag) { BUG_ON(!sc->dmarptr); BUG_ON(!sc->status_word); *sc->status_word = COMPLETION_WORD_INIT; - rdp = (struct octeon_instr_rdp *)&sc->cmd.rdp; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - sc->cmd.rptr = sc->dmarptr; + sc->cmd.cmd2.rptr = sc->dmarptr; } + len = (u32)ih2->dlengsz; if (sc->wait_time) sc->timeout = jiffies + sc->wait_time; - return octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc, - (u32)ih->dlengsz, REQTYPE_SOFT_COMMAND); + return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc, + len, REQTYPE_SOFT_COMMAND)); } int octeon_setup_sc_buffer_pool(struct octeon_device *oct) |