diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
34 files changed, 852 insertions, 653 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index c142b23bb401..1aeea5d65c01 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -479,6 +479,8 @@ static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); } + free_cpumask_var(available_cpus); + free_cpumask_var(non_intr_cpus); return 0; fail: diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 67052dc3100c..e0b1238d31df 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1685,6 +1685,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4101,10 +4109,12 @@ def_access_ibp_counter(rc_dupreq); def_access_ibp_counter(rdma_seq); def_access_ibp_counter(unaligned); def_access_ibp_counter(seq_naks); +def_access_ibp_counter(rc_crwaits); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), [C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH), [C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), [C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, @@ -4248,6 +4258,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, @@ -5119,6 +5131,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq), [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned), [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks), +[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits), [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_acks), [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL, @@ -6860,7 +6873,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) } rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ - rcvmask |= rcd->rcvhdrtail_kvaddr ? + rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(dd, rcvmask, rcd); hfi1_rcd_put(rcd); @@ -8392,20 +8405,62 @@ void force_recv_intr(struct hfi1_ctxtdata *rcd) static inline int check_packet_present(struct hfi1_ctxtdata *rcd) { u32 tail; - int present; - if (!rcd->rcvhdrtail_kvaddr) - present = (rcd->seq_cnt == - rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)))); - else /* is RDMA rtail */ - present = (rcd->head != get_rcvhdrtail(rcd)); - - if (present) + if (hfi1_packet_present(rcd)) return 1; /* fall back to a CSR read, correct indpendent of DMA_RTAIL */ tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL); - return rcd->head != tail; + return hfi1_rcd_head(rcd) != tail; +} + +/** + * Common code for receive contexts interrupt handlers. + * Update traces, increment kernel IRQ counter and + * setup ASPM when needed. + */ +static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + + trace_hfi1_receive_interrupt(dd, rcd); + this_cpu_inc(*dd->int_counter); + aspm_ctx_disable(rcd); +} + +/** + * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt + * when there are packets present in the queue. When calling + * with interrupts enabled please use hfi1_rcd_eoi_intr. + * + * @rcd: valid receive context + */ +static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) +{ + clear_recv_intr(rcd); + if (check_packet_present(rcd)) + force_recv_intr(rcd); +} + +/** + * hfi1_rcd_eoi_intr() - End of Interrupt processing action + * + * @rcd: Ptr to hfi1_ctxtdata of receive context + * + * Hold IRQs so we can safely clear the interrupt and + * recheck for a packet that may have arrived after the previous + * check and the interrupt clear. If a packet arrived, force another + * interrupt. This routine can be called at the end of receive packet + * processing in interrupt service routines, interrupt service thread + * and softirqs + */ +static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) +{ + unsigned long flags; + + local_irq_save(flags); + __hfi1_rcd_eoi_intr(rcd); + local_irq_restore(flags); } /* @@ -8419,13 +8474,9 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) irqreturn_t receive_context_interrupt(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; - struct hfi1_devdata *dd = rcd->dd; int disposition; - int present; - trace_hfi1_receive_interrupt(dd, rcd); - this_cpu_inc(*dd->int_counter); - aspm_ctx_disable(rcd); + receive_interrupt_common(rcd); /* receive interrupt remains blocked while processing packets */ disposition = rcd->do_interrupt(rcd, 0); @@ -8438,17 +8489,7 @@ irqreturn_t receive_context_interrupt(int irq, void *data) if (disposition == RCV_PKT_LIMIT) return IRQ_WAKE_THREAD; - /* - * The packet processor detected no more packets. Clear the receive - * interrupt and recheck for a packet packet that may have arrived - * after the previous check and interrupt clear. If a packet arrived, - * force another interrupt. - */ - clear_recv_intr(rcd); - present = check_packet_present(rcd); - if (present) - force_recv_intr(rcd); - + __hfi1_rcd_eoi_intr(rcd); return IRQ_HANDLED; } @@ -8459,24 +8500,11 @@ irqreturn_t receive_context_interrupt(int irq, void *data) irqreturn_t receive_context_thread(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; - int present; /* receive interrupt is still blocked from the IRQ handler */ (void)rcd->do_interrupt(rcd, 1); - /* - * The packet processor will only return if it detected no more - * packets. Hold IRQs here so we can safely clear the interrupt and - * recheck for a packet that may have arrived after the previous - * check and the interrupt clear. If a packet arrived, force another - * interrupt. - */ - local_irq_disable(); - clear_recv_intr(rcd); - present = check_packet_present(rcd); - if (present) - force_recv_intr(rcd); - local_irq_enable(); + hfi1_rcd_eoi_intr(rcd); return IRQ_HANDLED; } @@ -10047,7 +10075,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd) * the first kernel context would have been allocated by now so * we are guaranteed a valid value. */ - return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2; + return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2; } /* @@ -10092,7 +10120,7 @@ static void set_send_length(struct hfi1_pportdata *ppd) thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50), sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu, - dd->rcd[0]->rcvhdrqentsize)); + get_hdrqentsize(dd->rcd[0]))); for (j = 0; j < INIT_SC_PER_VL; j++) sc_set_cr_threshold( pio_select_send_context_vl(dd, j, i), @@ -11819,7 +11847,7 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd) head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT; - if (rcd->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(rcd)) tail = get_rcvhdrtail(rcd); else tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL); @@ -11863,6 +11891,84 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } +/** + * encode_rcv_header_entry_size - return chip specific encoding for size + * @size: size in dwords + * + * Convert a receive header entry size that to the encoding used in the CSR. + * + * Return a zero if the given size is invalid, otherwise the encoding. + */ +u8 encode_rcv_header_entry_size(u8 size) +{ + /* there are only 3 valid receive header entry sizes */ + if (size == 2) + return 1; + if (size == 16) + return 2; + if (size == 32) + return 4; + return 0; /* invalid */ +} + +/** + * hfi1_validate_rcvhdrcnt - validate hdrcnt + * @dd: the device data + * @thecnt: the header count + */ +int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt) +{ + if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { + dd_dev_err(dd, "Receive header queue count too small\n"); + return -EINVAL; + } + + if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { + dd_dev_err(dd, + "Receive header queue count cannot be greater than %u\n", + HFI1_MAX_HDRQ_EGRBUF_CNT); + return -EINVAL; + } + + if (thecnt % HDRQ_INCREMENT) { + dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n", + thecnt, HDRQ_INCREMENT); + return -EINVAL; + } + + return 0; +} + +/** + * set_hdrq_regs - set header queue registers for context + * @dd: the device data + * @ctxt: the context + * @entsize: the dword entry size + * @hdrcnt: the number of header entries + */ +void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt) +{ + u64 reg; + + reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) << + RCV_HDR_CNT_CNT_SHIFT; + write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg); + reg = ((u64)encode_rcv_header_entry_size(entsize) & + RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) << + RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; + write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg); + reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) << + RCV_HDR_SIZE_HDR_SIZE_SHIFT; + write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg); + + /* + * Program dummy tail address for every receive context + * before enabling any receive context + */ + write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, + dd->rcvhdrtail_dummy_dma); +} + void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, struct hfi1_ctxtdata *rcd) { @@ -11884,13 +11990,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, rcd->rcvhdrq_dma); - if (rcd->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(rcd)) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, rcd->rcvhdrqtailaddr_dma); - rcd->seq_cnt = 1; + hfi1_set_seq_cnt(rcd, 1); /* reset the cached receive header queue head value */ - rcd->head = 0; + hfi1_set_rcd_head(rcd, 0); /* * Zero the receive header queue so we don't get false @@ -11970,7 +12076,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, IS_RCVAVAIL_START + rcd->ctxt, false); rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; } - if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) + if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd)) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b76cf81f927f..725509261016 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -358,6 +358,8 @@ #define MAX_EAGER_BUFFER (256 * 1024) #define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */ #define MAX_EXPECTED_BUFFER (2048 * 1024) +#define HFI1_MIN_HDRQ_EGRBUF_CNT 32 +#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352 /* * Receive expected base and count and eager base and count increment - @@ -699,6 +701,10 @@ static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd) return read_csr(dd, RCV_ARRAY_CNT); } +u8 encode_rcv_header_entry_size(u8 size); +int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt); +void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt); + u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, u32 dw_len); @@ -859,6 +865,7 @@ static inline int idx_from_vl(int vl) enum { C_RCV_OVF = 0, C_RX_LEN_ERR, + C_RX_SHORT_ERR, C_RX_ICRC_ERR, C_RX_EBP, C_RX_TID_FULL, @@ -926,6 +933,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, @@ -1245,6 +1253,7 @@ enum { C_SW_IBP_RDMA_SEQ, C_SW_IBP_UNALIGNED, C_SW_IBP_SEQ_NAK, + C_SW_IBP_RC_CRWAITS, C_SW_CPU_RC_ACKS, C_SW_CPU_RC_QACKS, C_SW_CPU_RC_DELAYED_COMP, diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index ab3589d17aee..fb3ec9bff7a2 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -381,6 +381,7 @@ #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) #define RCV_LENGTH_ERR_CNT 0 +#define RCV_SHORT_ERR_CNT 2 #define RCV_ICRC_ERR_CNT 6 #define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index d47da7b0438f..40a1ff0c8a8e 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -323,6 +323,9 @@ struct diag_pkt { /* RHF receive type error - bypass packet errors */ #define RHF_RTE_BYPASS_NO_ERR 0x0 +/* MAX RcvSEQ */ +#define RHF_MAX_SEQ 13 + /* IB - LRH header constants */ #define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index d268bf9c42ee..4633a0ce1a8c 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -379,7 +379,7 @@ static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos) struct hfi1_devdata *dd = dd_from_dev(ibd); ++*pos; - if (!dd->rcd || *pos >= dd->n_krcv_queues) + if (!dd->rcd || *pos >= dd->num_rcv_contexts) return NULL; return pos; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 01aa1f132f55..049d15befe58 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -411,14 +411,14 @@ drop: static inline void init_packet(struct hfi1_ctxtdata *rcd, struct hfi1_packet *packet) { - packet->rsize = rcd->rcvhdrqentsize; /* words */ - packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */ + packet->rsize = get_hdrqentsize(rcd); /* words */ + packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */ packet->rcd = rcd; packet->updegr = 0; packet->etail = -1; packet->rhf_addr = get_rhf_addr(rcd); packet->rhf = rhf_to_cpu(packet->rhf_addr); - packet->rhqoff = rcd->head; + packet->rhqoff = hfi1_rcd_head(rcd); packet->numpkt = 0; } @@ -551,22 +551,22 @@ static inline void init_ps_mdata(struct ps_mdata *mdata, mdata->maxcnt = packet->maxcnt; mdata->ps_head = packet->rhqoff; - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { + if (get_dma_rtail_setting(rcd)) { mdata->ps_tail = get_rcvhdrtail(rcd); if (rcd->ctxt == HFI1_CTRL_CTXT) - mdata->ps_seq = rcd->seq_cnt; + mdata->ps_seq = hfi1_seq_cnt(rcd); else mdata->ps_seq = 0; /* not used with DMA_RTAIL */ } else { mdata->ps_tail = 0; /* used only with DMA_RTAIL*/ - mdata->ps_seq = rcd->seq_cnt; + mdata->ps_seq = hfi1_seq_cnt(rcd); } } static inline int ps_done(struct ps_mdata *mdata, u64 rhf, struct hfi1_ctxtdata *rcd) { - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) + if (get_dma_rtail_setting(rcd)) return mdata->ps_head == mdata->ps_tail; return mdata->ps_seq != rhf_rcv_seq(rhf); } @@ -592,11 +592,9 @@ static inline void update_ps_mdata(struct ps_mdata *mdata, mdata->ps_head = 0; /* Control context must do seq counting */ - if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) || - (rcd->ctxt == HFI1_CTRL_CTXT)) { - if (++mdata->ps_seq > 13) - mdata->ps_seq = 1; - } + if (!get_dma_rtail_setting(rcd) || + rcd->ctxt == HFI1_CTRL_CTXT) + mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq); } /* @@ -734,6 +732,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) @@ -769,7 +768,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) * The +2 is the size of the RHF. */ prefetch_range(packet->ebuf, - packet->tlen - ((packet->rcd->rcvhdrqentsize - + packet->tlen - ((get_hdrqentsize(packet->rcd) - (rhf_hdrq_offset(packet->rhf) + 2)) * 4)); } @@ -823,7 +822,7 @@ static inline void finish_packet(struct hfi1_packet *packet) * The only thing we need to do is a final update and call for an * interrupt */ - update_usrhead(packet->rcd, packet->rcd->head, packet->updegr, + update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr, packet->etail, rcv_intr_dynamic, packet->numpkt); } @@ -832,13 +831,11 @@ static inline void finish_packet(struct hfi1_packet *packet) */ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) { - u32 seq; int last = RCV_PKT_OK; struct hfi1_packet packet; init_packet(rcd, &packet); - seq = rhf_rcv_seq(packet.rhf); - if (seq != rcd->seq_cnt) { + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) { last = RCV_PKT_DONE; goto bail; } @@ -847,15 +844,12 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) while (last == RCV_PKT_OK) { last = process_rcv_packet(&packet, thread); - seq = rhf_rcv_seq(packet.rhf); - if (++rcd->seq_cnt > 13) - rcd->seq_cnt = 1; - if (seq != rcd->seq_cnt) + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) last = RCV_PKT_DONE; process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); - rcd->head = packet.rhqoff; + hfi1_set_rcd_head(rcd, packet.rhqoff); bail: finish_packet(&packet); return last; @@ -884,15 +878,14 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); - rcd->head = packet.rhqoff; + hfi1_set_rcd_head(rcd, packet.rhqoff); bail: finish_packet(&packet); return last; } -static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) +static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd; u16 i; /* @@ -900,50 +893,17 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler only for that context. Otherwise, switch * interrupt handler for all statically allocated kernel contexts. */ - if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); - if (rcd) { - rcd->do_interrupt = - &handle_receive_interrupt_nodma_rtail; - hfi1_rcd_put(rcd); - } - return; - } - - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) - rcd->do_interrupt = - &handle_receive_interrupt_nodma_rtail; + if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) { + hfi1_rcd_get(rcd); + hfi1_set_fast(rcd); hfi1_rcd_put(rcd); - } -} - -static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) -{ - struct hfi1_ctxtdata *rcd; - u16 i; - - /* - * For dynamically allocated kernel contexts (like vnic) switch - * interrupt handler only for that context. Otherwise, switch - * interrupt handler for all statically allocated kernel contexts. - */ - if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); - if (rcd) { - rcd->do_interrupt = - &handle_receive_interrupt_dma_rtail; - hfi1_rcd_put(rcd); - } return; } - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { + for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) - rcd->do_interrupt = - &handle_receive_interrupt_dma_rtail; + if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)) + hfi1_set_fast(rcd); hfi1_rcd_put(rcd); } } @@ -959,17 +919,14 @@ void set_all_slowpath(struct hfi1_devdata *dd) if (!rcd) continue; if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - rcd->do_interrupt = &handle_receive_interrupt; + rcd->do_interrupt = rcd->slow_handler; hfi1_rcd_put(rcd); } } -static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, - struct hfi1_packet *packet, - struct hfi1_devdata *dd) +static bool __set_armed_to_active(struct hfi1_packet *packet) { - struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; u8 etype = rhf_rcv_type(packet->rhf); u8 sc = SC15_PACKET; @@ -984,19 +941,34 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, sc = hfi1_16B_get_sc(hdr); } if (sc != SC15_PACKET) { - int hwstate = driver_lstate(rcd->ppd); + int hwstate = driver_lstate(packet->rcd->ppd); + struct work_struct *lsaw = + &packet->rcd->ppd->linkstate_active_work; if (hwstate != IB_PORT_ACTIVE) { - dd_dev_info(dd, + dd_dev_info(packet->rcd->dd, "Unexpected link state %s\n", opa_lstate_name(hwstate)); - return 0; + return false; } - queue_work(rcd->ppd->link_wq, lsaw); - return 1; + queue_work(packet->rcd->ppd->link_wq, lsaw); + return true; } - return 0; + return false; +} + +/** + * armed to active - the fast path for armed to active + * @packet: the packet structure + * + * Return true if packet processing needs to bail. + */ +static bool set_armed_to_active(struct hfi1_packet *packet) +{ + if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED)) + return false; + return __set_armed_to_active(packet); } /* @@ -1019,10 +991,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) init_packet(rcd, &packet); - if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { - u32 seq = rhf_rcv_seq(packet.rhf); - - if (seq != rcd->seq_cnt) { + if (!get_dma_rtail_setting(rcd)) { + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) { last = RCV_PKT_DONE; goto bail; } @@ -1039,22 +1009,15 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) * Control context can potentially receive an invalid * rhf. Drop such packets. */ - if (rcd->ctxt == HFI1_CTRL_CTXT) { - u32 seq = rhf_rcv_seq(packet.rhf); - - if (seq != rcd->seq_cnt) + if (rcd->ctxt == HFI1_CTRL_CTXT) + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) skip_pkt = 1; - } } prescan_rxq(rcd, &packet); while (last == RCV_PKT_OK) { - if (unlikely(dd->do_drop && - atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) == - DROP_PACKET_ON)) { - dd->do_drop = 0; - + if (hfi1_need_drop(dd)) { /* On to the next packet */ packet.rhqoff += packet.rsize; packet.rhf_addr = (__le32 *)rcd->rcvhdrq + @@ -1066,26 +1029,14 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) last = skip_rcv_packet(&packet, thread); skip_pkt = 0; } else { - /* Auto activate link on non-SC15 packet receive */ - if (unlikely(rcd->ppd->host_link_state == - HLS_UP_ARMED) && - set_armed_to_active(rcd, &packet, dd)) + if (set_armed_to_active(&packet)) goto bail; last = process_rcv_packet(&packet, thread); } - if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { - u32 seq = rhf_rcv_seq(packet.rhf); - - if (++rcd->seq_cnt > 13) - rcd->seq_cnt = 1; - if (seq != rcd->seq_cnt) + if (!get_dma_rtail_setting(rcd)) { + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) last = RCV_PKT_DONE; - if (needset) { - dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); - set_nodma_rtail(dd, rcd->ctxt); - needset = 0; - } } else { if (packet.rhqoff == hdrqtail) last = RCV_PKT_DONE; @@ -1094,27 +1045,24 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) * rhf. Drop such packets. */ if (rcd->ctxt == HFI1_CTRL_CTXT) { - u32 seq = rhf_rcv_seq(packet.rhf); + bool lseq; - if (++rcd->seq_cnt > 13) - rcd->seq_cnt = 1; - if (!last && (seq != rcd->seq_cnt)) + lseq = hfi1_seq_incr(rcd, + rhf_rcv_seq(packet.rhf)); + if (!last && lseq) skip_pkt = 1; } - - if (needset) { - dd_dev_info(dd, - "Switching to DMA_RTAIL\n"); - set_dma_rtail(dd, rcd->ctxt); - needset = 0; - } } + if (needset) { + needset = false; + set_all_fastpath(dd, rcd); + } process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); - rcd->head = packet.rhqoff; + hfi1_set_rcd_head(rcd, packet.rhqoff); bail: /* @@ -1606,23 +1554,22 @@ void handle_eflags(struct hfi1_packet *packet) * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. */ -static int process_receive_ib(struct hfi1_packet *packet) +static void process_receive_ib(struct hfi1_packet *packet) { if (hfi1_setup_9B_packet(packet)) - return RHF_RCV_CONTINUE; + return; if (unlikely(hfi1_dbg_should_fault_rx(packet))) - return RHF_RCV_CONTINUE; + return; trace_hfi1_rcvhdr(packet); if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); - return RHF_RCV_CONTINUE; + return; } hfi1_ib_rcv(packet); - return RHF_RCV_CONTINUE; } static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) @@ -1638,23 +1585,23 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) return false; } -static int process_receive_bypass(struct hfi1_packet *packet) +static void process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; if (hfi1_is_vnic_packet(packet)) { hfi1_vnic_bypass_rcv(packet); - return RHF_RCV_CONTINUE; + return; } if (hfi1_setup_bypass_packet(packet)) - return RHF_RCV_CONTINUE; + return; trace_hfi1_rcvhdr(packet); if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); - return RHF_RCV_CONTINUE; + return; } if (hfi1_16B_get_l2(packet->hdr) == 0x2) { @@ -1677,17 +1624,16 @@ static int process_receive_bypass(struct hfi1_packet *packet) (OPA_EI_STATUS_SMASK | BAD_L2_ERR); } } - return RHF_RCV_CONTINUE; } -static int process_receive_error(struct hfi1_packet *packet) +static void process_receive_error(struct hfi1_packet *packet) { /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR || packet->rhf & RHF_DC_ERR))) - return RHF_RCV_CONTINUE; + return; hfi1_setup_ib_header(packet); handle_eflags(packet); @@ -1695,32 +1641,29 @@ static int process_receive_error(struct hfi1_packet *packet) if (unlikely(rhf_err_flags(packet->rhf))) dd_dev_err(packet->rcd->dd, "Unhandled error packet received. Dropping.\n"); - - return RHF_RCV_CONTINUE; } -static int kdeth_process_expected(struct hfi1_packet *packet) +static void kdeth_process_expected(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) - return RHF_RCV_CONTINUE; + return; if (unlikely(rhf_err_flags(packet->rhf))) { struct hfi1_ctxtdata *rcd = packet->rcd; if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet)) - return RHF_RCV_CONTINUE; + return; } hfi1_kdeth_expected_rcv(packet); - return RHF_RCV_CONTINUE; } -static int kdeth_process_eager(struct hfi1_packet *packet) +static void kdeth_process_eager(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) - return RHF_RCV_CONTINUE; + return; trace_hfi1_rcvhdr(packet); if (unlikely(rhf_err_flags(packet->rhf))) { @@ -1728,37 +1671,41 @@ static int kdeth_process_eager(struct hfi1_packet *packet) show_eflags_errs(packet); if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet)) - return RHF_RCV_CONTINUE; + return; } hfi1_kdeth_eager_rcv(packet); - return RHF_RCV_CONTINUE; } -static int process_receive_invalid(struct hfi1_packet *packet) +static void process_receive_invalid(struct hfi1_packet *packet) { dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", rhf_rcv_type(packet->rhf)); - return RHF_RCV_CONTINUE; } +#define HFI1_RCVHDR_DUMP_MAX 5 + void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) { struct hfi1_packet packet; struct ps_mdata mdata; + int i; - seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n", - rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize, - HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? + seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu sw head %u\n", + rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd), + get_dma_rtail_setting(rcd) ? "dma_rtail" : "nodma_rtail", + read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL), + read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS), read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & RCV_HDR_HEAD_HEAD_MASK, - read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL)); + read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL), + rcd->head); init_packet(rcd, &packet); init_ps_mdata(&mdata, &packet); - while (1) { + for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) { __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + rcd->rhf_offset; struct ib_header *hdr; diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 93613e5def9b..986c12153e62 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -141,12 +141,14 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, if (!data) return -ENOMEM; copy = min(len, datalen - 1); - if (copy_from_user(data, buf, copy)) - return -EFAULT; + if (copy_from_user(data, buf, copy)) { + ret = -EFAULT; + goto free_data; + } ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; ptr = data; token = ptr; for (ptr = data; *ptr; ptr = end + 1, token = ptr) { @@ -195,6 +197,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, ret = len; debugfs_file_put(file->f_path.dentry); +free_data: kfree(data); return ret; } @@ -214,7 +217,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, return -ENOMEM; ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; bit = find_first_bit(fault->opcodes, bitsize); while (bit < bitsize) { zero = find_next_zero_bit(fault->opcodes, bitsize, bit); @@ -232,6 +235,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, data[size - 1] = '\n'; data[size] = '\0'; ret = simple_read_from_buffer(buf, len, pos, data, size); +free_data: kfree(data); return ret; } diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f9a7e9d29c8b..259115886d35 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -200,23 +200,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) { - fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); - fd->dd = dd; - kobject_get(&fd->dd->kobj); - fp->private_data = fd; - } else { - fp->private_data = NULL; - - if (atomic_dec_and_test(&dd->user_refcount)) - complete(&dd->user_comp); - - return -ENOMEM; - } - + if (!fd || init_srcu_struct(&fd->pq_srcu)) + goto nomem; + spin_lock_init(&fd->pq_rcu_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + mmgrab(fd->mm); + fd->dd = dd; + kobject_get(&fd->dd->kobj); + fp->private_data = fd; return 0; +nomem: + kfree(fd); + fp->private_data = NULL; + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + return -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -301,21 +302,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; int done = 0, reqs = 0; unsigned long dim = from->nr_segs; + int idx; - if (!cq || !pq) + idx = srcu_read_lock(&fd->pq_srcu); + pq = srcu_dereference(fd->pq, &fd->pq_srcu); + if (!cq || !pq) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EIO; + } - if (!iter_is_iovec(from) || !dim) + if (!iter_is_iovec(from) || !dim) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EINVAL; + } trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { + srcu_read_unlock(&fd->pq_srcu, idx); return -ENOSPC; + } while (dim) { int ret; @@ -333,6 +343,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) reqs++; } + srcu_read_unlock(&fd->pq_srcu, idx); return reqs; } @@ -505,12 +516,12 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EINVAL; goto done; } - if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) { + if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) { ret = -EPERM; goto done; } memlen = PAGE_SIZE; - memvirt = (void *)uctxt->rcvhdrtail_kvaddr; + memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt); flags &= ~VM_MAYWRITE; break; case SUBCTXT_UREGS: @@ -707,6 +718,7 @@ done: if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); + cleanup_srcu_struct(&fdata->pq_srcu); kfree(fdata); return 0; } @@ -1090,7 +1102,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). */ - if (uctxt->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(uctxt)) clear_rcvhdrtail(uctxt); /* Setup J_KEY before enabling the context */ @@ -1138,7 +1150,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | HFI1_CAP_KGET_MASK(uctxt->flags, K2U); /* adjust flag if this fd is not able to cache */ - if (!fd->handler) + if (!fd->use_mn) cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ cinfo.num_active = hfi1_count_active_units(); @@ -1154,8 +1166,8 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) cinfo.send_ctxt = uctxt->sc->hw_context; cinfo.egrtids = uctxt->egrbufs.alloced; - cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; + cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt); + cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2; cinfo.sdma_ring_size = fd->cq->nentries; cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; @@ -1543,7 +1555,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, * always resets it's tail register back to 0 on a * transition from disabled to enabled. */ - if (uctxt->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(uctxt)) clear_rcvhdrtail(uctxt); rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; } else { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fa45350a9a1d..cae12f416ca0 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -197,7 +197,9 @@ struct exp_tid_set { u32 count; }; -typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); +struct hfi1_ctxtdata; +typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data); +typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); struct tid_queue { struct list_head queue_head; @@ -226,7 +228,11 @@ struct hfi1_ctxtdata { * be valid. Worst case is we process an extra interrupt and up to 64 * packets with the wrong interrupt handler. */ - int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + intr_handler do_interrupt; + /** fast handler after autoactive */ + intr_handler fast_handler; + /** slow handler */ + intr_handler slow_handler; /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; /* clear interrupt mask */ @@ -1153,6 +1159,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; @@ -1310,7 +1318,7 @@ struct hfi1_devdata { struct err_info_constraint err_info_xmit_constraint; atomic_t drop_packet; - u8 do_drop; + bool do_drop; u8 err_info_uncorrectable; u8 err_info_fmconfig; @@ -1436,15 +1444,18 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct srcu_struct pq_srcu; struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; struct hfi1_user_sdma_comp_q *cq; - struct hfi1_user_sdma_pkt_q *pq; + /* update side lock for SRCU */ + spinlock_t pq_rcu_lock; + struct hfi1_user_sdma_pkt_q __rcu *pq; u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; u32 tid_n_pinned; - struct mmu_rb_handler *handler; + bool use_mn; struct tid_rb_node **entry_to_rb; spinlock_t tid_lock; /* protect tid_[limit,used] counters */ u32 tid_limit; @@ -1507,12 +1518,148 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, #define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */ #define RCV_PKT_DONE 0x2 /* stop, no more packets detected */ +/** + * hfi1_rcd_head - add accessor for rcd head + * @rcd: the context + */ +static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd) +{ + return rcd->head; +} + +/** + * hfi1_set_rcd_head - add accessor for rcd head + * @rcd: the context + * @head: the new head + */ +static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head) +{ + rcd->head = head; +} + /* calculate the current RHF address */ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) { return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset; } +/* return DMA_RTAIL configuration */ +static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd) +{ + return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL); +} + +/** + * hfi1_seq_incr_wrap - wrapping increment for sequence + * @seq: the current sequence number + * + * Returns: the incremented seq + */ +static inline u8 hfi1_seq_incr_wrap(u8 seq) +{ + if (++seq > RHF_MAX_SEQ) + seq = 1; + return seq; +} + +/** + * hfi1_seq_cnt - return seq_cnt member + * @rcd: the receive context + * + * Return seq_cnt member + */ +static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd) +{ + return rcd->seq_cnt; +} + +/** + * hfi1_set_seq_cnt - return seq_cnt member + * @rcd: the receive context + * + * Return seq_cnt member + */ +static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt) +{ + rcd->seq_cnt = cnt; +} + +/** + * last_rcv_seq - is last + * @rcd: the receive context + * @seq: sequence + * + * return true if last packet + */ +static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq) +{ + return seq != rcd->seq_cnt; +} + +/** + * rcd_seq_incr - increment context sequence number + * @rcd: the receive context + * @seq: the current sequence number + * + * Returns: true if the this was the last packet + */ +static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq) +{ + rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt); + return last_rcv_seq(rcd, seq); +} + +/** + * get_hdrqentsize - return hdrq entry size + * @rcd: the receive context + */ +static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd) +{ + return rcd->rcvhdrqentsize; +} + +/** + * get_hdrq_cnt - return hdrq count + * @rcd: the receive context + */ +static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd) +{ + return rcd->rcvhdrq_cnt; +} + +/** + * hfi1_is_slowpath - check if this context is slow path + * @rcd: the receive context + */ +static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd) +{ + return rcd->do_interrupt == rcd->slow_handler; +} + +/** + * hfi1_is_fastpath - check if this context is fast path + * @rcd: the receive context + */ +static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd) +{ + if (rcd->ctxt == HFI1_CTRL_CTXT) + return false; + + return rcd->do_interrupt == rcd->fast_handler; +} + +/** + * hfi1_set_fast - change to the fast handler + * @rcd: the receive context + */ +static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd) +{ + if (unlikely(!rcd)) + return; + if (unlikely(!hfi1_is_fastpath(rcd))) + rcd->do_interrupt = rcd->fast_handler; +} + int hfi1_reset_device(int); void receive_interrupt_work(struct work_struct *work); @@ -2015,9 +2162,21 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, size_t npages, bool dirty); +/** + * hfi1_rcvhdrtail_kvaddr - return tail kvaddr + * @rcd - the receive context + */ +static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd) +{ + return (__le64 *)rcd->rcvhdrtail_kvaddr; +} + static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) { - *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL; + u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd); + + if (kv) + *kv = 0ULL; } static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) @@ -2026,7 +2185,17 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) * volatile because it's a DMA target from the chip, routine is * inlined, and don't want register caching or reordering. */ - return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr); + return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd)); +} + +static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd) +{ + if (likely(!rcd->rcvhdrtail_kvaddr)) { + u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))); + + return !last_rcv_seq(rcd, seq); + } + return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd); } /* @@ -2298,6 +2467,25 @@ static inline bool is_integrated(struct hfi1_devdata *dd) return dd->pcidev->device == PCI_DEVICE_ID_INTEL1; } +/** + * hfi1_need_drop - detect need for drop + * @dd: - the device + * + * In some cases, the first packet needs to be dropped. + * + * Return true is the current packet needs to be dropped and false otherwise. + */ +static inline bool hfi1_need_drop(struct hfi1_devdata *dd) +{ + if (unlikely(dd->do_drop && + atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) == + DROP_PACKET_ON)) { + dd->do_drop = false; + return true; + } + return false; +} + int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 71cb9525c074..e3acda7a0800 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -78,8 +78,6 @@ */ #define HFI1_MIN_USER_CTXT_BUFCNT 7 -#define HFI1_MIN_HDRQ_EGRBUF_CNT 2 -#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ @@ -122,8 +120,6 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)"); -static inline u64 encode_rcv_header_entry_size(u16 size); - DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static int hfi1_create_kctxt(struct hfi1_devdata *dd, @@ -154,7 +150,12 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd, /* Control context must use DMA_RTAIL */ if (rcd->ctxt == HFI1_CTRL_CTXT) rcd->flags |= HFI1_CAP_DMA_RTAIL; - rcd->seq_cnt = 1; + rcd->fast_handler = get_dma_rtail_setting(rcd) ? + handle_receive_interrupt_dma_rtail : + handle_receive_interrupt_nodma_rtail; + rcd->slow_handler = handle_receive_interrupt; + + hfi1_set_seq_cnt(rcd, 1); rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); if (!rcd->sc) { @@ -511,23 +512,6 @@ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd) } /* - * Convert a receive header entry size that to the encoding used in the CSR. - * - * Return a zero if the given size is invalid. - */ -static inline u64 encode_rcv_header_entry_size(u16 size) -{ - /* there are only 3 valid receive header entry sizes */ - if (size == 2) - return 1; - if (size == 16) - return 2; - else if (size == 32) - return 4; - return 0; /* invalid */ -} - -/* * Select the largest ccti value over all SLs to determine the intra- * packet gap for the link. * @@ -892,10 +876,10 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (is_ax(dd)) { atomic_set(&dd->drop_packet, DROP_PACKET_ON); - dd->do_drop = 1; + dd->do_drop = true; } else { atomic_set(&dd->drop_packet, DROP_PACKET_OFF); - dd->do_drop = 0; + dd->do_drop = false; } /* make sure the link is not "up" */ @@ -1149,9 +1133,9 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; - if (rcd->rcvhdrtail_kvaddr) { + if (hfi1_rcvhdrtail_kvaddr(rcd)) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *)rcd->rcvhdrtail_kvaddr, + (void *)hfi1_rcvhdrtail_kvaddr(rcd), rcd->rcvhdrqtailaddr_dma); rcd->rcvhdrtail_kvaddr = NULL; } @@ -1489,7 +1473,6 @@ static int __init hfi1_mod_init(void) goto bail_dev; } - hfi1_compute_tid_rdma_flow_wt(); /* * These must be called before the driver is registered with * the PCI subsystem. @@ -1612,29 +1595,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd) hfi1_free_devdata(dd); } -static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt) -{ - if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { - dd_dev_err(dd, "Receive header queue count too small\n"); - return -EINVAL; - } - - if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { - dd_dev_err(dd, - "Receive header queue count cannot be greater than %u\n", - HFI1_MAX_HDRQ_EGRBUF_CNT); - return -EINVAL; - } - - if (thecnt % HDRQ_INCREMENT) { - dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n", - thecnt, HDRQ_INCREMENT); - return -EINVAL; - } - - return 0; -} - static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; @@ -1662,7 +1622,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Validate some global module parameters */ - ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt); + ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt); if (ret) goto bail; @@ -1843,7 +1803,6 @@ static void shutdown_one(struct pci_dev *pdev) int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { unsigned amt; - u64 reg; if (!rcd->rcvhdrq) { gfp_t gfp_flags; @@ -1875,30 +1834,9 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) goto bail_free; } } - /* - * These values are per-context: - * RcvHdrCnt - * RcvHdrEntSize - * RcvHdrSize - */ - reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT) - & RCV_HDR_CNT_CNT_MASK) - << RCV_HDR_CNT_CNT_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg); - reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize) - & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) - << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); - reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) - << RCV_HDR_SIZE_HDR_SIZE_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); - /* - * Program dummy tail address for every receive context - * before enabling any receive context - */ - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_dma); + set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize, + rcd->rcvhdrq_cnt); return 0; diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c index adb4a1ba921b..5836fe7b2817 100644 --- a/drivers/infiniband/hw/hfi1/iowait.c +++ b/drivers/infiniband/hw/hfi1/iowait.c @@ -81,7 +81,9 @@ void iowait_init(struct iowait *wait, u32 tx_limit, void iowait_cancel_work(struct iowait *w) { cancel_work_sync(&iowait_get_ib_work(w)->iowork); - cancel_work_sync(&iowait_get_tid_work(w)->iowork); + /* Make sure that the iowork for TID RDMA is used */ + if (iowait_get_tid_work(w)->iowork.func) + cancel_work_sync(&iowait_get_tid_work(w)->iowork); } /** diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 184dba3c2828..a51bcd2b4391 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2326,7 +2326,7 @@ struct opa_port_status_req { __be32 vl_select_mask; }; -#define VL_MASK_ALL 0x000080ff +#define VL_MASK_ALL 0x00000000000080ffUL struct opa_port_status_rsp { __u8 port_num; @@ -2625,15 +2625,14 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp, } static void a0_portstatus(struct hfi1_pportdata *ppd, - struct opa_port_status_rsp *rsp, u32 vl_select_mask) + struct opa_port_status_rsp *rsp) { if (!is_bx(ppd->dd)) { unsigned long vl; u64 sum_vl_xmit_wait = 0; - u32 vl_all_mask = VL_MASK_ALL; + unsigned long vl_all_mask = VL_MASK_ALL; - for_each_set_bit(vl, (unsigned long *)&(vl_all_mask), - 8 * sizeof(vl_all_mask)) { + for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) { u64 tmp = sum_vl_xmit_wait + read_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl)); @@ -2730,12 +2729,12 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, (struct opa_port_status_req *)pmp->data; struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct opa_port_status_rsp *rsp; - u32 vl_select_mask = be32_to_cpu(req->vl_select_mask); + unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask); unsigned long vl; size_t response_data_size; u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24; u8 port_num = req->port_num; - u8 num_vls = hweight32(vl_select_mask); + u8 num_vls = hweight64(vl_select_mask); struct _vls_pctrs *vlinfo; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -2770,7 +2769,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, hfi1_read_link_quality(dd, &rsp->link_quality_indicator); - rsp->vl_select_mask = cpu_to_be32(vl_select_mask); + rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask); rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL)); rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS, @@ -2841,8 +2840,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, * So in the for_each_set_bit() loop below, we don't need * any additional checks for vl. */ - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { memset(vlinfo, 0, sizeof(*vlinfo)); tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl)); @@ -2883,7 +2881,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, vfi++; } - a0_portstatus(ppd, rsp, vl_select_mask); + a0_portstatus(ppd, rsp); if (resp_len) *resp_len += response_data_size; @@ -2930,16 +2928,14 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port, return error_counter_summary; } -static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp, - u32 vl_select_mask) +static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp) { if (!is_bx(ppd->dd)) { unsigned long vl; u64 sum_vl_xmit_wait = 0; - u32 vl_all_mask = VL_MASK_ALL; + unsigned long vl_all_mask = VL_MASK_ALL; - for_each_set_bit(vl, (unsigned long *)&(vl_all_mask), - 8 * sizeof(vl_all_mask)) { + for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) { u64 tmp = sum_vl_xmit_wait + read_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl)); @@ -2994,7 +2990,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, u64 port_mask; u8 port_num; unsigned long vl; - u32 vl_select_mask; + unsigned long vl_select_mask; int vfi; u16 link_width; u16 link_speed; @@ -3071,8 +3067,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, * So in the for_each_set_bit() loop below, we don't need * any additional checks for vl. */ - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(req->vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { memset(vlinfo, 0, sizeof(*vlinfo)); rsp->vls[vfi].port_vl_xmit_data = @@ -3120,7 +3115,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, vfi++; } - a0_datacounters(ppd, rsp, vl_select_mask); + a0_datacounters(ppd, rsp); if (resp_len) *resp_len += response_data_size; @@ -3215,7 +3210,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, struct _vls_ectrs *vlinfo; unsigned long vl; u64 port_mask, tmp; - u32 vl_select_mask; + unsigned long vl_select_mask; int vfi; req = (struct opa_port_error_counters64_msg *)pmp->data; @@ -3273,8 +3268,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, vlinfo = &rsp->vls[0]; vfi = 0; vl_select_mask = be32_to_cpu(req->vl_select_mask); - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(req->vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { memset(vlinfo, 0, sizeof(*vlinfo)); rsp->vls[vfi].port_vl_xmit_discards = cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, @@ -3485,7 +3479,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24; u64 portn = be64_to_cpu(req->port_select_mask[3]); u32 counter_select = be32_to_cpu(req->counter_select_mask); - u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */ + unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */ unsigned long vl; if ((nports != 1) || (portn != 1 << port)) { @@ -3579,8 +3573,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_UNCORRECTABLE_ERRORS) write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0); - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { if (counter_select & CS_PORT_XMIT_DATA) write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0); @@ -4922,16 +4915,11 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) { - switch (in_mad->base_version) { + switch (in_mad->mad_hdr.base_version) { case OPA_MGMT_BASE_VERSION: - if (unlikely(in_mad_size != sizeof(struct opa_mad))) { - dev_err(ibdev->dev.parent, "invalid in_mad_size\n"); - return IB_MAD_RESULT_FAILURE; - } return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, (struct opa_mad *)in_mad, @@ -4939,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, out_mad_size, out_mad_pkey_index); case IB_MGMT_BASE_VERSION: - return hfi1_process_ib_mad(ibdev, mad_flags, port, - in_wc, in_grh, - (const struct ib_mad *)in_mad, - (struct ib_mad *)out_mad); + return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc, + in_grh, in_mad, out_mad); default: break; } diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index d920b165d696..db82db497b2c 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -115,13 +115,11 @@ int msix_initialize(struct hfi1_devdata *dd) */ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, irq_handler_t handler, irq_handler_t thread, - u32 idx, enum irq_type type) + enum irq_type type, const char *name) { unsigned long nr; int irq; int ret; - const char *err_info; - char name[MAX_NAME_SIZE]; struct hfi1_msix_entry *me; /* Allocate an MSIx vector */ @@ -135,43 +133,15 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, if (nr == dd->msix_info.max_requested) return -ENOSPC; - /* Specific verification and determine the name */ - switch (type) { - case IRQ_GENERAL: - /* general interrupt must be MSIx vector 0 */ - if (nr) { - spin_lock(&dd->msix_info.msix_lock); - __clear_bit(nr, dd->msix_info.in_use_msix); - spin_unlock(&dd->msix_info.msix_lock); - dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n", - nr); - return -EINVAL; - } - snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); - err_info = "general"; - break; - case IRQ_SDMA: - snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", - dd->unit, idx); - err_info = "sdma"; - break; - case IRQ_RCVCTXT: - snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", - dd->unit, idx); - err_info = "receive context"; - break; - case IRQ_OTHER: - default: + if (type < IRQ_SDMA || type >= IRQ_OTHER) return -EINVAL; - } - name[sizeof(name) - 1] = 0; irq = pci_irq_vector(dd->pcidev, nr); ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name); if (ret) { dd_dev_err(dd, - "%s: request for IRQ %d failed, MSIx %d, err %d\n", - err_info, irq, idx, ret); + "%s: request for IRQ %d failed, MSIx %lu, err %d\n", + name, irq, nr, ret); spin_lock(&dd->msix_info.msix_lock); __clear_bit(nr, dd->msix_info.in_use_msix); spin_unlock(&dd->msix_info.msix_lock); @@ -195,17 +165,13 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, return nr; } -/** - * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs - * @rcd: valid rcd context - * - */ -int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) +static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd, + irq_handler_t handler, + irq_handler_t thread, + const char *name) { - int nr; - - nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt, - receive_context_thread, rcd->ctxt, IRQ_RCVCTXT); + int nr = msix_request_irq(rcd->dd, rcd, handler, thread, + IRQ_RCVCTXT, name); if (nr < 0) return nr; @@ -222,6 +188,22 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) } /** + * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs + * @rcd: valid rcd context + * + */ +int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) +{ + char name[MAX_NAME_SIZE]; + + snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", + rcd->dd->unit, rcd->ctxt); + + return msix_request_rcd_irq_common(rcd, receive_context_interrupt, + receive_context_thread, name); +} + +/** * msix_request_smda_ira() - Helper for getting SDMA IRQ resources * @sde: valid sdma engine * @@ -229,9 +211,12 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) int msix_request_sdma_irq(struct sdma_engine *sde) { int nr; + char name[MAX_NAME_SIZE]; + snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", + sde->dd->unit, sde->this_idx); nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL, - sde->this_idx, IRQ_SDMA); + IRQ_SDMA, name); if (nr < 0) return nr; sde->msix_intr = nr; @@ -241,6 +226,32 @@ int msix_request_sdma_irq(struct sdma_engine *sde) } /** + * msix_request_general_irq(void) - Helper for getting general IRQ + * resources + * @dd: valid device data + */ +int msix_request_general_irq(struct hfi1_devdata *dd) +{ + int nr; + char name[MAX_NAME_SIZE]; + + snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); + nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL, + name); + if (nr < 0) + return nr; + + /* general interrupt must be MSIx vector 0 */ + if (nr) { + msix_free_irq(dd, (u8)nr); + dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr); + return -EINVAL; + } + + return 0; +} + +/** * enable_sdma_src() - Helper to enable SDMA IRQ srcs * @dd: valid devdata structure * @i: index of SDMA engine @@ -265,10 +276,9 @@ static void enable_sdma_srcs(struct hfi1_devdata *dd, int i) int msix_request_irqs(struct hfi1_devdata *dd) { int i; - int ret; + int ret = msix_request_general_irq(dd); - ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL); - if (ret < 0) + if (ret) return ret; for (i = 0; i < dd->num_sdma; i++) { diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h index a514881632a4..1a02ab7971c8 100644 --- a/drivers/infiniband/hw/hfi1/msix.h +++ b/drivers/infiniband/hw/hfi1/msix.h @@ -54,6 +54,7 @@ int msix_initialize(struct hfi1_devdata *dd); int msix_request_irqs(struct hfi1_devdata *dd); void msix_clean_up_interrupts(struct hfi1_devdata *dd); +int msix_request_general_irq(struct hfi1_devdata *dd); int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd); int msix_request_sdma_irq(struct sdma_engine *sde); void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 61aa5504d7c3..1a6268d61977 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -161,7 +161,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) return -EINVAL; } - dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY); + dd->kregbase1 = ioremap(addr, RCV_ARRAY); if (!dd->kregbase1) { dd_dev_err(dd, "UC mapping of kregbase1 failed\n"); return -ENOMEM; @@ -179,7 +179,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count); dd->base2_start = RCV_ARRAY + rcv_array_count * 8; - dd->kregbase2 = ioremap_nocache( + dd->kregbase2 = ioremap( addr + dd->base2_start, TXE_PIO_SEND - dd->base2_start); if (!dd->kregbase2) { @@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed */ - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { + if (parent && + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd->link_gen3_capable = 0; } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbf7faa5038c..36593f2efe26 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -634,7 +634,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, u32 config_data, const char *message) { u8 i; - int ret = HCMD_SUCCESS; + int ret; for (i = 0; i < 4; i++) { ret = load_8051_config(ppd->dd, field_id, i, config_data); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 024a7c2b6124..f1734e5e9ac4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -595,11 +595,8 @@ check_s_state: case IB_WR_SEND_WITH_IMM: case IB_WR_SEND_WITH_INV: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } if (len > pmtu) { qp->s_state = OP(SEND_FIRST); len = pmtu; @@ -632,11 +629,8 @@ check_s_state: goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } no_flow_control: put_ib_reth_vaddr( wqe->rdma_wr.remote_addr, @@ -1483,6 +1477,11 @@ static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn, req->ack_pending = cur_seg - req->comp_seg; priv->pending_tid_r_segs += req->ack_pending; qp->s_num_rd_atomic += req->ack_pending; + trace_hfi1_tid_req_update_num_rd_atomic(qp, 0, + wqe->wr.opcode, + wqe->psn, + wqe->lpsn, + req); } else { priv->pending_tid_r_segs += req->total_segs; qp->s_num_rd_atomic += req->total_segs; @@ -2210,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_flags & RVT_S_WAIT_RNR) goto bail_stop; rdi = ib_to_rvt(qp->ibqp.device); - if (qp->s_rnr_retry == 0 && - !((rdi->post_parms[wqe->wr.opcode].flags & - RVT_OPERATION_IGN_RNR_CNT) && - qp->s_rnr_retry_cnt == 0)) { - status = IB_WC_RNR_RETRY_EXC_ERR; - goto class_b; + if (!(rdi->post_parms[wqe->wr.opcode].flags & + RVT_OPERATION_IGN_RNR_CNT)) { + if (qp->s_rnr_retry == 0) { + status = IB_WC_RNR_RETRY_EXC_ERR; + goto class_b; + } + if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) + qp->s_rnr_retry--; } - if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) - qp->s_rnr_retry--; /* * The last valid PSN is the previous PSN. For TID RDMA WRITE @@ -2600,7 +2599,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, * to be sent before sending this one. */ e = NULL; - old_req = 1; + old_req = true; ibp->rvp.n_rc_dupreq++; spin_lock_irqsave(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2395fd4233a7..a51525647ac8 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -65,6 +65,7 @@ #define SDMA_DESCQ_CNT 2048 #define SDMA_DESC_INTR 64 #define INVALID_TAIL 0xffff +#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32)) static uint sdma_descq_cnt = SDMA_DESCQ_CNT; module_param(sdma_descq_cnt, uint, S_IRUGO); @@ -847,7 +848,7 @@ static const struct rhashtable_params sdma_rht_params = { .nelem_hint = NR_CPUS_HINT, .head_offset = offsetof(struct sdma_rht_node, node), .key_offset = offsetof(struct sdma_rht_node, cpu_id), - .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id), + .key_len = sizeof_field(struct sdma_rht_node, cpu_id), .max_size = NR_CPUS, .min_size = 8, .automatic_shrinking = true, @@ -880,8 +881,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, cpu_id = smp_processor_id(); rcu_read_lock(); - rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id, - sdma_rht_params); + rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id, + sdma_rht_params); if (rht_node && rht_node->map[vl]) { struct sdma_rht_map_elem *map = rht_node->map[vl]; @@ -1296,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) struct sdma_engine *sde; if (dd->sdma_pad_dma) { - dma_free_coherent(&dd->pcidev->dev, 4, + dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, (void *)dd->sdma_pad_dma, dd->sdma_pad_phys); dd->sdma_pad_dma = NULL; @@ -1491,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } /* Allocate memory for pad */ - dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32), + dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD, &dd->sdma_pad_phys, GFP_KERNEL); if (!dd->sdma_pad_dma) { dd_dev_err(dd, "failed to allocate SendDMA pad memory\n"); @@ -1526,8 +1527,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 996fc298207e..8a2e0d9351e9 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -107,8 +107,6 @@ static u32 mask_generation(u32 a) * C - Capcode */ -static u32 tid_rdma_flow_wt; - static void tid_rdma_trigger_resume(struct work_struct *work); static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req); static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, @@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet, struct tid_rdma_flow *flow, bool fecn); +static void validate_r_tid_ack(struct hfi1_qp_priv *priv) +{ + if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) + priv->r_tid_ack = priv->r_tid_tail; +} + +static void tid_rdma_schedule_ack(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + priv->s_flags |= RVT_S_ACK_PENDING; + hfi1_schedule_tid_send(qp); +} + +static void tid_rdma_trigger_ack(struct rvt_qp *qp) +{ + validate_r_tid_ack(qp->priv); + tid_rdma_schedule_ack(qp); +} + static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { return @@ -2574,18 +2592,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) hfi1_kern_clear_hw_flow(priv->rcd, qp); } -static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, - struct hfi1_packet *packet, u8 rcv_type, - u8 opcode) +static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type) { struct rvt_qp *qp = packet->qp; - struct hfi1_qp_priv *qpriv = qp->priv; - u32 ipsn; - struct ib_other_headers *ohdr = packet->ohdr; - struct rvt_ack_entry *e; - struct tid_rdma_request *req; - struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); - u32 i; if (rcv_type >= RHF_RCV_TYPE_IB) goto done; @@ -2602,41 +2611,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, if (rcv_type == RHF_RCV_TYPE_EAGER) { hfi1_restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_schedule_send(qp); - goto done_unlock; - } - - /* - * For TID READ response, error out QP after freeing the tid - * resources. - */ - if (opcode == TID_OP(READ_RESP)) { - ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn)); - if (cmp_psn(ipsn, qp->s_last_psn) > 0 && - cmp_psn(ipsn, qp->s_psn) < 0) { - hfi1_kern_read_tid_flow_free(qp); - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - goto done; - } - goto done_unlock; } - /* - * Error out the qp for TID RDMA WRITE - */ - hfi1_kern_clear_hw_flow(qpriv->rcd, qp); - for (i = 0; i < rvt_max_atomic(rdi); i++) { - e = &qp->s_ack_queue[i]; - if (e->opcode == TID_OP(WRITE_REQ)) { - req = ack_to_tid_req(e); - hfi1_kern_exp_rcv_clear_all(req); - } - } - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_LEN_ERR); - goto done; - -done_unlock: + /* Since no payload is delivered, just drop the packet */ spin_unlock(&qp->s_lock); done: return true; @@ -2687,12 +2664,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, u32 fpsn; lockdep_assert_held(&qp->r_lock); + trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn); + trace_hfi1_sender_read_kdeth_eflags(qp); + trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0); + spin_lock(&qp->s_lock); /* If the psn is out of valid range, drop the packet */ if (cmp_psn(ibpsn, qp->s_last_psn) < 0 || cmp_psn(ibpsn, qp->s_psn) > 0) - return ret; + goto s_unlock; - spin_lock(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -2740,14 +2720,19 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, wqe = do_rc_completion(qp, wqe, ibp); if (qp->s_acked == qp->s_tail) - break; + goto s_unlock; } + if (qp->s_acked == qp->s_tail) + goto s_unlock; + /* Handle the eflags for the request */ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) goto s_unlock; req = wqe_to_tid_req(wqe); + trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn, + wqe->lpsn, req); switch (rcv_type) { case RHF_RCV_TYPE_EXPECTED: switch (rte) { @@ -2762,15 +2747,13 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, * packets that could be still in the fabric. */ flow = &req->flows[req->clear_tail]; + trace_hfi1_tid_flow_read_kdeth_eflags(qp, + req->clear_tail, + flow); if (priv->s_flags & HFI1_R_TID_SW_PSN) { diff = cmp_psn(psn, flow->flow_state.r_next_psn); if (diff > 0) { - if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) - restart_tid_rdma_read_req(rcd, - qp, - wqe); - /* Drop the packet.*/ goto s_unlock; } else if (diff < 0) { @@ -2922,7 +2905,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (lnh == HFI1_LRH_GRH) goto r_unlock; - if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode)) + if (tid_rdma_tid_err(packet, rcv_type)) goto r_unlock; } @@ -2942,8 +2925,15 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, */ spin_lock(&qp->s_lock); qpriv = qp->priv; + if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID || + qpriv->r_tid_tail == qpriv->r_tid_head) + goto unlock; e = &qp->s_ack_queue[qpriv->r_tid_tail]; + if (e->opcode != TID_OP(WRITE_REQ)) + goto unlock; req = ack_to_tid_req(e); + if (req->comp_seg == req->cur_seg) + goto unlock; flow = &req->flows[req->clear_tail]; trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn); trace_hfi1_rsp_handle_kdeth_eflags(qp, psn); @@ -3033,10 +3023,7 @@ nak_psn: qpriv->s_nak_state = IB_NAK_PSN_ERROR; /* We are NAK'ing the next expected PSN */ qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn); - qpriv->s_flags |= RVT_S_ACK_PENDING; - if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID) - qpriv->r_tid_ack = qpriv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto unlock; } @@ -3399,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32); } -void hfi1_compute_tid_rdma_flow_wt(void) +static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp) { /* * Heuristic for computing the RNR timeout when waiting on the flow * queue. Rather than a computationaly expensive exact estimate of when * a flow will be available, we assume that if a QP is at position N in * the flow queue it has to wait approximately (N + 1) * (number of - * segments between two sync points), assuming PMTU of 4K. The rationale - * for this is that flows are released and recycled at each sync point. + * segments between two sync points). The rationale for this is that + * flows are released and recycled at each sync point. */ - tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) / - TID_RDMA_MAX_SEGMENT_SIZE; + return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT; } static u32 position_in_queue(struct hfi1_qp_priv *qpriv, @@ -3533,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) { ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp); if (ret) { - to_seg = tid_rdma_flow_wt * + to_seg = hfi1_compute_tid_rdma_flow_wt(qp) * position_in_queue(qpriv, &rcd->flow_queue); break; @@ -3554,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) /* * If overtaking req->acked_tail, send an RNR NAK. Because the * QP is not queued in this case, and the issue can only be - * caused due a delay in scheduling the second leg which we + * caused by a delay in scheduling the second leg which we * cannot estimate, we use a rather arbitrary RNR timeout of * (MAX_FLOWS / 2) segments */ @@ -3562,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) MAX_FLOWS)) { ret = -EAGAIN; to_seg = MAX_FLOWS >> 1; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); break; } @@ -4363,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn, req); trace_hfi1_tid_write_rsp_rcv_data(qp); - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; + validate_r_tid_ack(priv); if (opcode == TID_OP(WRITE_DATA_LAST)) { release_rdma_sge_mr(e); @@ -4403,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) } done: - priv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_schedule_ack(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; if (fecn) @@ -4416,10 +4399,7 @@ send_nak: if (!priv->s_nak_state) { priv->s_nak_state = IB_NAK_PSN_ERROR; priv->s_nak_psn = flow->flow_state.r_next_psn; - priv->s_flags |= RVT_S_ACK_PENDING; - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto done; } @@ -4509,7 +4489,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct rvt_swqe *wqe; struct tid_rdma_request *req; struct tid_rdma_flow *flow; - u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn; + u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn; unsigned long flags; u16 fidx; @@ -4538,6 +4518,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ack_kpsn--; } + if (unlikely(qp->s_acked == qp->s_tail)) + goto ack_op_err; + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE) @@ -4550,7 +4533,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); /* Drop stale ACK/NAK */ - if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0) + if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 || + cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0) goto ack_op_err; while (cmp_psn(ack_kpsn, @@ -4649,6 +4633,15 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) */ fpsn = full_flow_psn(flow, flow->flow_state.spsn); req->r_ack_psn = psn; + /* + * If resync_psn points to the last flow PSN for a + * segment and the new segment (likely from a new + * request) starts with a new generation number, we + * need to adjust resync_psn accordingly. + */ + if (flow->flow_state.generation != + (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT)) + resync_psn = mask_psn(fpsn - 1); flow->resync_npkts += delta_psn(mask_psn(resync_psn + 1), fpsn); /* @@ -4712,7 +4705,12 @@ done: switch ((aeth >> IB_AETH_CREDIT_SHIFT) & IB_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ + if (!req->flows) + break; flow = &req->flows[req->acked_tail]; + flpsn = full_flow_psn(flow, flow->flow_state.lpsn); + if (cmp_psn(psn, flpsn) > 0) + break; trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2])); @@ -4958,8 +4956,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->resync = true; /* RESYNC request always gets a TID RDMA ACK. */ qpriv->s_nak_state = 0; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); bail: if (fecn) qp->s_flags |= RVT_S_ECN; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 1c536185261e..6e82df2190b7 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -17,6 +17,7 @@ #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) +#define TID_RDMA_SEGMENT_SHIFT 18 /* * Bit definitions for priv->s_flags. @@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); -void hfi1_compute_tid_rdma_flow_wt(void); - void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index e00c8a7d559c..b5fc5c6cd52f 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -80,7 +80,7 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->credits = uctxt->sc->credits; __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; - __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; + __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt); __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 3cec960e9674..168079ed122c 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -106,19 +106,8 @@ TRACE_EVENT(hfi1_receive_interrupt, ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = rcd->ctxt; - if (rcd->do_interrupt == - &handle_receive_interrupt) { - __entry->slow_path = 1; - __entry->dma_rtail = 0xFF; - } else if (rcd->do_interrupt == - &handle_receive_interrupt_dma_rtail){ - __entry->dma_rtail = 1; - __entry->slow_path = 0; - } else if (rcd->do_interrupt == - &handle_receive_interrupt_nodma_rtail) { - __entry->dma_rtail = 0; - __entry->slow_path = 0; - } + __entry->slow_path = hfi1_is_slowpath(rcd); + __entry->dma_rtail = get_dma_rtail_setting(rcd); ), TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", __get_str(dev), diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index 4388b594ed1b..985ffa9cc958 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -138,10 +138,10 @@ TRACE_EVENT(/* put_tid */ TP_ARGS(dd, index, type, pa, order), TP_STRUCT__entry(/* entry */ DD_DEV_ENTRY(dd) - __field(unsigned long, pa); - __field(u32, index); - __field(u32, type); - __field(u16, order); + __field(unsigned long, pa) + __field(u32, index) + __field(u32, type) + __field(u16, order) ), TP_fast_assign(/* assign */ DD_DEV_ASSIGN(dd); @@ -627,6 +627,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, index, flow) ); +DEFINE_EVENT(/* event */ + hfi1_tid_flow_template, hfi1_tid_flow_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow), + TP_ARGS(qp, index, flow) +); + DECLARE_EVENT_CLASS(/* tid_node */ hfi1_tid_node_template, TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base, @@ -851,6 +857,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, psn) ); +DEFINE_EVENT(/* event */ + hfi1_responder_info_template, hfi1_rsp_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + DECLARE_EVENT_CLASS(/* sender_info */ hfi1_sender_info_template, TP_PROTO(struct rvt_qp *qp), @@ -955,6 +967,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp) ); +DEFINE_EVENT(/* event */ + hfi1_sender_info_template, hfi1_sender_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp) +); + DECLARE_EVENT_CLASS(/* tid_read_sender */ hfi1_tid_read_sender_template, TP_PROTO(struct rvt_qp *qp, char newreq), @@ -1015,6 +1033,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, newreq) ); +DEFINE_EVENT(/* event */ + hfi1_tid_read_sender_template, hfi1_tid_read_sender_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, char newreq), + TP_ARGS(qp, newreq) +); + DECLARE_EVENT_CLASS(/* tid_rdma_request */ hfi1_tid_rdma_request_template, TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, @@ -1216,6 +1240,13 @@ DEFINE_EVENT(/* event */ ); DEFINE_EVENT(/* event */ + hfi1_tid_rdma_request_template, hfi1_tid_req_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, + struct tid_rdma_request *req), + TP_ARGS(qp, newreq, opcode, psn, lpsn, req) +); + +DEFINE_EVENT(/* event */ hfi1_tid_rdma_request_template, hfi1_tid_req_make_rc_ack_write, TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, struct tid_rdma_request *req), @@ -1229,6 +1260,13 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, newreq, opcode, psn, lpsn, req) ); +DEFINE_EVENT(/* event */ + hfi1_tid_rdma_request_template, hfi1_tid_req_update_num_rd_atomic, + TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, + struct tid_rdma_request *req), + TP_ARGS(qp, newreq, opcode, psn, lpsn, req) +); + DECLARE_EVENT_CLASS(/* rc_rcv_err */ hfi1_rc_rcv_err_template, TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff), diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 09eb0c9ada00..769e5e4710c6 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -588,7 +588,7 @@ TRACE_EVENT(hfi1_sdma_user_reqinfo, TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), TP_ARGS(dd, ctxt, subctxt, i), TP_STRUCT__entry( - DD_DEV_ENTRY(dd); + DD_DEV_ENTRY(dd) __field(u16, ctxt) __field(u8, subctxt) __field(u8, ver_opcode) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 3592a9ec155e..4da03f823474 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -59,11 +59,11 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, struct tid_user_buf *tbuf, u32 rcventry, struct tid_group *grp, u16 pageidx, unsigned int npages); -static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); -static void tid_rb_remove(void *arg, struct mmu_rb_node *node); -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, struct tid_group *grp, unsigned int start, u16 count, @@ -73,10 +73,8 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); -static struct mmu_rb_ops tid_rb_ops = { - .insert = tid_rb_insert, - .remove = tid_rb_remove, - .invalidate = tid_rb_invalidate +static const struct mmu_interval_notifier_ops tid_mn_ops = { + .invalidate = tid_rb_invalidate, }; /* @@ -87,12 +85,8 @@ static struct mmu_rb_ops tid_rb_ops = { int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt) { - struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - spin_lock_init(&fd->tid_lock); - spin_lock_init(&fd->invalid_lock); - fd->entry_to_rb = kcalloc(uctxt->expected_count, sizeof(struct rb_node *), GFP_KERNEL); @@ -109,20 +103,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, fd->entry_to_rb = NULL; return -ENOMEM; } - - /* - * Register MMU notifier callbacks. If the registration - * fails, continue without TID caching for this context. - */ - ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, - dd->pport->hfi1_wq, - &fd->handler); - if (ret) { - dd_dev_info(dd, - "Failed MMU notifier registration %d\n", - ret); - ret = 0; - } + fd->use_mn = true; } /* @@ -139,7 +120,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, * init. */ spin_lock(&fd->tid_lock); - if (uctxt->subctxt_cnt && fd->handler) { + if (uctxt->subctxt_cnt && fd->use_mn) { u16 remainder; fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; @@ -158,18 +139,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; - /* - * The notifier would have been removed when the process'es mm - * was freed. - */ - if (fd->handler) { - hfi1_mmu_rb_unregister(fd->handler); - } else { - if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) - unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); - if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) - unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); - } + mutex_lock(&uctxt->exp_mutex); + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); + mutex_unlock(&uctxt->exp_mutex); kfree(fd->invalid_tids); fd->invalid_tids = NULL; @@ -201,7 +176,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, if (mapped) { pci_unmap_single(dd->pcidev, node->dma_addr, - node->mmu.len, PCI_DMA_FROMDEVICE); + node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); pages = &node->pages[idx]; } else { pages = &tidbuf->pages[idx]; @@ -777,8 +752,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, return -EFAULT; } - node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); - node->mmu.len = npages * PAGE_SIZE; + node->fdata = fd; node->phys = page_to_phys(pages[0]); node->npages = npages; node->rcventry = rcventry; @@ -787,23 +761,35 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, node->freed = false; memcpy(node->pages, pages, sizeof(struct page *) * npages); - if (!fd->handler) - ret = tid_rb_insert(fd, &node->mmu); - else - ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); - - if (ret) { - hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", - node->rcventry, node->mmu.addr, node->phys, ret); - pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, - PCI_DMA_FROMDEVICE); - kfree(node); - return -EFAULT; + if (fd->use_mn) { + ret = mmu_interval_notifier_insert( + &node->notifier, fd->mm, + tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, + &tid_mn_ops); + if (ret) + goto out_unmap; + /* + * FIXME: This is in the wrong order, the notifier should be + * established before the pages are pinned by pin_rcv_pages. + */ + mmu_interval_read_begin(&node->notifier); } + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; + hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, - node->mmu.addr, node->phys, phys); + node->notifier.interval_tree.start, node->phys, + phys); return 0; + +out_unmap: + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", + node->rcventry, node->notifier.interval_tree.start, + node->phys, ret); + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, + PCI_DMA_FROMDEVICE); + kfree(node); + return -EFAULT; } static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, @@ -833,10 +819,9 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, if (grp) *grp = node->grp; - if (!fd->handler) - cacheless_tid_rb_remove(fd, node); - else - hfi1_mmu_rb_remove(fd->handler, &node->mmu); + if (fd->use_mn) + mmu_interval_notifier_remove(&node->notifier); + cacheless_tid_rb_remove(fd, node); return 0; } @@ -847,7 +832,8 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) struct hfi1_devdata *dd = uctxt->dd; trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, - node->npages, node->mmu.addr, node->phys, + node->npages, + node->notifier.interval_tree.start, node->phys, node->dma_addr); /* @@ -894,30 +880,29 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, if (!node || node->rcventry != rcventry) continue; + if (fd->use_mn) + mmu_interval_notifier_remove( + &node->notifier); cacheless_tid_rb_remove(fd, node); } } } } -/* - * Always return 0 from this function. A non-zero return indicates that the - * remove operation will be called and that memory should be unpinned. - * However, the driver cannot unpin out from under PSM. Instead, retain the - * memory (by returning 0) and inform PSM that the memory is going away. PSM - * will call back later when it has removed the memory from its list. - */ -static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) +static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - struct hfi1_filedata *fdata = arg; - struct hfi1_ctxtdata *uctxt = fdata->uctxt; struct tid_rb_node *node = - container_of(mnode, struct tid_rb_node, mmu); + container_of(mni, struct tid_rb_node, notifier); + struct hfi1_filedata *fdata = node->fdata; + struct hfi1_ctxtdata *uctxt = fdata->uctxt; if (node->freed) - return 0; + return true; - trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, + node->notifier.interval_tree.start, node->rcventry, node->npages, node->dma_addr); node->freed = true; @@ -946,18 +931,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) fdata->invalid_tid_idx++; } spin_unlock(&fdata->invalid_lock); - return 0; -} - -static int tid_rb_insert(void *arg, struct mmu_rb_node *node) -{ - struct hfi1_filedata *fdata = arg; - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); - u32 base = fdata->uctxt->expected_base; - - fdata->entry_to_rb[tnode->rcventry - base] = tnode; - return 0; + return true; } static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, @@ -968,12 +942,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, fdata->entry_to_rb[tnode->rcventry - base] = NULL; clear_tid_node(fdata, tnode); } - -static void tid_rb_remove(void *arg, struct mmu_rb_node *node) -{ - struct hfi1_filedata *fdata = arg; - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); - - cacheless_tid_rb_remove(fdata, tnode); -} diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 43b105de1d54..6257eee083a1 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -65,7 +65,8 @@ struct tid_user_buf { }; struct tid_rb_node { - struct mmu_rb_node mmu; + struct mmu_interval_notifier notifier; + struct hfi1_filedata *fdata; unsigned long phys; struct tid_group *grp; u32 rcventry; diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index b89a9b9aef7a..3b505006c0a6 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -106,7 +106,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np int ret; unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0); - ret = get_user_pages_fast(vaddr, npages, gup_flags, pages); + ret = pin_user_pages_fast(vaddr, npages, gup_flags, pages); if (ret < 0) return ret; @@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, size_t npages, bool dirty) { - if (dirty) - put_user_pages_dirty_lock(p, npages); - else - put_user_pages(p, npages); + unpin_user_pages_dirty_lock(p, npages, dirty); if (mm) { /* during close after signal, mm can be NULL */ atomic64_sub(npages, &mm->pinned_vm); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index fd754a16475a..c2f0d9ba93de 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) return -ENOMEM; - pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto pq_mmu_fail; } - fd->pq = pq; + rcu_assign_pointer(fd->pq, pq); fd->cq = cq; return 0; @@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); - pq = fd->pq; + spin_lock(&fd->pq_rcu_lock); + pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, + lockdep_is_held(&fd->pq_rcu_lock)); if (pq) { + rcu_assign_pointer(fd->pq, NULL); + spin_unlock(&fd->pq_rcu_lock); + synchronize_srcu(&fd->pq_srcu); + /* at this point there can be no more new requests */ if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); @@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); - fd->pq = NULL; + } else { + spin_unlock(&fd->pq_rcu_lock); } if (fd->cq) { vfree(fd->cq->comps); @@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, { int ret = 0, i; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq = + srcu_dereference(fd->pq, &fd->pq_srcu); struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; unsigned long idx = 0; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 4d8510b0fc38..9972e0e6545e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -110,12 +110,6 @@ enum pkt_q_sdma_state { SDMA_PKT_Q_DEFERRED, }; -/* - * Maximum retry attempts to submit a TX request - * before putting the process to sleep. - */ -#define MAX_DEFER_RETRY_COUNT 1 - #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ #define SDMA_DBG(req, fmt, ...) \ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 646f61545ed6..089e201d7550 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 -/* 16B trailing buffer */ -static const u8 trail_buf[MAX_16B_PADDING]; - static uint wss_threshold = 80; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -820,8 +817,8 @@ static int build_verbs_tx_desc( /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - (void *)trail_buf, extra_bytes); + ret = sdma_txadd_daddr(sde->dd, &tx->txreq, + sde->dd->sdma_pad_phys, extra_bytes); bail_txadd: return ret; @@ -874,16 +871,17 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) - pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); - /* Update HCRC based on packet opcode */ - pbc = update_hcrc(ps->opcode, pbc); + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); + else + /* Update HCRC based on packet opcode */ + pbc = update_hcrc(ps->opcode, pbc); } tx->wqe = qp->s_wqe; ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); @@ -1030,17 +1028,17 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); + pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); - pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); - - /* Update HCRC based on packet opcode */ - pbc = update_hcrc(ps->opcode, pbc); + else + /* Update HCRC based on packet opcode */ + pbc = update_hcrc(ps->opcode, pbc); } if (cb) iowait_pio_inc(&priv->s_iowait); pbuf = sc_buffer_alloc(sc, plen, cb, qp); - if (unlikely(IS_ERR_OR_NULL(pbuf))) { + if (IS_ERR_OR_NULL(pbuf)) { if (cb) verbs_pio_complete(qp, 0); if (IS_ERR(pbuf)) { @@ -1088,7 +1086,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); + seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma, + extra_bytes); seg_pio_copy_end(pbuf); } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ae9582ddbc8f..d36e3e14896d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -107,9 +107,9 @@ enum { HFI1_HAS_GRH = (1 << 0), }; -#define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh)) +#define LRH_16B_BYTES (sizeof_field(struct hfi1_16b_header, lrh)) #define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32)) -#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh)) +#define LRH_9B_BYTES (sizeof_field(struct ib_header, lrh)) #define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32)) /* 24Bits for qpn, upper 8Bits reserved */ @@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); /* * The PSN_MASK and PSN_SHIFT allow for diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b49e60e8397d..6b14581b9965 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -78,7 +78,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) if (ret) goto done; - if (uctxt->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(uctxt)) clear_rcvhdrtail(uctxt); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index af1b1ffcb38e..7d90b900131b 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -102,13 +102,13 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde, goto bail_txadd; for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) { - struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(tx->skb)->frags[i]; /* combine physically continuous fragments later? */ ret = sdma_txadd_page(sde->dd, &tx->txreq, skb_frag_page(frag), - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag)); if (unlikely(ret)) goto bail_txadd; |