diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 13:04:07 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 13:04:07 -0700 |
commit | a1cdde8c411dbde19863e5104a4a1f218dd07b89 (patch) | |
tree | 518b2e6a5f8dd9f70d93afe2b063bfcfa7694621 /drivers/infiniband/sw/rdmavt | |
parent | 3a3869f1c443383ef8354ffa0e5fb8df65d8b549 (diff) | |
parent | c1191a19fecad92b73c25770a7f47174280ca564 (diff) | |
download | talos-obmc-linux-a1cdde8c411dbde19863e5104a4a1f218dd07b89.tar.gz talos-obmc-linux-a1cdde8c411dbde19863e5104a4a1f218dd07b89.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a quiet cycle for RDMA, the big bulk is the usual
smallish driver updates and bug fixes. About four new uAPI related
things. Not as much Szykaller patches this time, the bugs it finds are
getting harder to fix.
Summary:
- More work cleaning up the RDMA CM code
- Usual driver bug fixes and cleanups for qedr, qib, hfi1, hns,
i40iw, iw_cxgb4, mlx5, rxe
- Driver specific resource tracking and reporting via netlink
- Continued work for name space support from Parav
- MPLS support for the verbs flow steering uAPI
- A few tricky IPoIB fixes improving robustness
- HFI1 driver support for the '16B' management packet format
- Some auditing to not print kernel pointers via %llx or similar
- Mark the entire 'UCM' user-space interface as BROKEN with the
intent to remove it entirely. The user space side of this was long
ago replaced with RDMA-CM and syzkaller is finding bugs in the
residual UCM interface nobody wishes to fix because nobody uses it.
- Purge more bogus BUG_ON's from Leon
- 'flow counters' verbs uAPI
- T10 fixups for iser/isert, these are Acked by Martin but going
through the RDMA tree due to dependencies"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (138 commits)
RDMA/mlx5: Update SPDX tags to show proper license
RDMA/restrack: Change SPDX tag to properly reflect license
IB/hfi1: Fix comment on default hdr entry size
IB/hfi1: Rename exp_lock to exp_mutex
IB/hfi1: Add bypass register defines and replace blind constants
IB/hfi1: Remove unused variable
IB/hfi1: Ensure VL index is within bounds
IB/hfi1: Fix user context tail allocation for DMA_RTAIL
IB/hns: Use zeroing memory allocator instead of allocator/memset
infiniband: fix a possible use-after-free bug
iw_cxgb4: add INFINIBAND_ADDR_TRANS dependency
IB/isert: use T10-PI check mask definitions from core layer
IB/iser: use T10-PI check mask definitions from core layer
RDMA/core: introduce check masks for T10-PI offload
IB/isert: fix T10-pi check mask setting
IB/mlx5: Add counters read support
IB/mlx5: Add flow counters read support
IB/mlx5: Add flow counters binding support
IB/mlx5: Add counters create and destroy support
IB/uverbs: Add support for flow counters
...
Diffstat (limited to 'drivers/infiniband/sw/rdmavt')
-rw-r--r-- | drivers/infiniband/sw/rdmavt/Kconfig | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/cq.c | 74 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/cq.h | 6 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/qp.c | 151 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/trace_cq.h | 35 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/vt.c | 35 |
6 files changed, 233 insertions, 70 deletions
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index 2b5513da7e83..98e798007f75 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_RDMAVT tristate "RDMA verbs transport library" - depends on 64BIT + depends on 64BIT && ARCH_DMA_ADDR_T_64BIT depends on PCI select DMA_VIRT_OPS ---help--- diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index fb52b669bfce..4f1544ad4aff 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -47,11 +47,12 @@ #include <linux/slab.h> #include <linux/vmalloc.h> -#include <linux/kthread.h> #include "cq.h" #include "vt.h" #include "trace.h" +static struct workqueue_struct *comp_vector_wq; + /** * rvt_cq_enter - add a new entry to the completion queue * @cq: completion queue @@ -124,20 +125,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) * This will cause send_complete() to be called in * another thread. */ - spin_lock(&cq->rdi->n_cqs_lock); - if (likely(cq->rdi->worker)) { - cq->notify = RVT_CQ_NONE; - cq->triggered++; - kthread_queue_work(cq->rdi->worker, &cq->comptask); - } - spin_unlock(&cq->rdi->n_cqs_lock); + cq->notify = RVT_CQ_NONE; + cq->triggered++; + queue_work_on(cq->comp_vector_cpu, comp_vector_wq, + &cq->comptask); } spin_unlock_irqrestore(&cq->lock, flags); } EXPORT_SYMBOL(rvt_cq_enter); -static void send_complete(struct kthread_work *work) +static void send_complete(struct work_struct *work) { struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask); @@ -189,6 +187,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, struct ib_cq *ret; u32 sz; unsigned int entries = attr->cqe; + int comp_vector = attr->comp_vector; if (attr->flags) return ERR_PTR(-EINVAL); @@ -196,6 +195,11 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, if (entries < 1 || entries > rdi->dparms.props.max_cqe) return ERR_PTR(-EINVAL); + if (comp_vector < 0) + comp_vector = 0; + + comp_vector = comp_vector % rdi->ibdev.num_comp_vectors; + /* Allocate the completion queue structure. */ cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node); if (!cq) @@ -264,14 +268,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, * an error. */ cq->rdi = rdi; + if (rdi->driver_f.comp_vect_cpu_lookup) + cq->comp_vector_cpu = + rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector); + else + cq->comp_vector_cpu = + cpumask_first(cpumask_of_node(rdi->dparms.node)); + cq->ibcq.cqe = entries; cq->notify = RVT_CQ_NONE; spin_lock_init(&cq->lock); - kthread_init_work(&cq->comptask, send_complete); + INIT_WORK(&cq->comptask, send_complete); cq->queue = wc; ret = &cq->ibcq; + trace_rvt_create_cq(cq, attr); goto done; bail_ip: @@ -297,7 +309,7 @@ int rvt_destroy_cq(struct ib_cq *ibcq) struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; - kthread_flush_work(&cq->comptask); + flush_work(&cq->comptask); spin_lock_irq(&rdi->n_cqs_lock); rdi->n_cqs_allocated--; spin_unlock_irq(&rdi->n_cqs_lock); @@ -507,24 +519,13 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) * * Return: 0 on success */ -int rvt_driver_cq_init(struct rvt_dev_info *rdi) +int rvt_driver_cq_init(void) { - int cpu; - struct kthread_worker *worker; - - if (rdi->worker) - return 0; - - spin_lock_init(&rdi->n_cqs_lock); - - cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); - worker = kthread_create_worker_on_cpu(cpu, 0, - "%s", rdi->dparms.cq_name); - if (IS_ERR(worker)) - return PTR_ERR(worker); + comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE, + 0, "rdmavt_cq"); + if (!comp_vector_wq) + return -ENOMEM; - set_user_nice(worker->task, MIN_NICE); - rdi->worker = worker; return 0; } @@ -532,19 +533,8 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) * rvt_cq_exit - tear down cq reources * @rdi: rvt dev structure */ -void rvt_cq_exit(struct rvt_dev_info *rdi) +void rvt_cq_exit(void) { - struct kthread_worker *worker; - - /* block future queuing from send_complete() */ - spin_lock_irq(&rdi->n_cqs_lock); - worker = rdi->worker; - if (!worker) { - spin_unlock_irq(&rdi->n_cqs_lock); - return; - } - rdi->worker = NULL; - spin_unlock_irq(&rdi->n_cqs_lock); - - kthread_destroy_worker(worker); + destroy_workqueue(comp_vector_wq); + comp_vector_wq = NULL; } diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 6182c29eff66..72184b1c176b 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -2,7 +2,7 @@ #define DEF_RVTCQ_H /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -59,6 +59,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -int rvt_driver_cq_init(struct rvt_dev_info *rdi); -void rvt_cq_exit(struct rvt_dev_info *rdi); +int rvt_driver_cq_init(void); +void rvt_cq_exit(void); #endif /* DEF_RVTCQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c82e6bb3d77c..40046135c509 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1987,6 +1987,155 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return 0; } +/* + * Validate a RWQE and fill in the SGE state. + * Return 1 if OK. + */ +static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) +{ + int i, j, ret; + struct ib_wc wc; + struct rvt_lkey_table *rkt; + struct rvt_pd *pd; + struct rvt_sge_state *ss; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + rkt = &rdi->lkey_table; + pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); + ss = &qp->r_sge; + ss->sg_list = qp->r_sg_list; + qp->r_len = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + NULL, &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE); + if (unlikely(ret <= 0)) + goto bad_lkey; + qp->r_len += wqe->sg_list[i].length; + j++; + } + ss->num_sge = j; + ss->total_len = qp->r_len; + return 1; + +bad_lkey: + while (j) { + struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; + + rvt_put_mr(sge->mr); + } + ss->num_sge = 0; + memset(&wc, 0, sizeof(wc)); + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + /* Signal solicited completion event. */ + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + return 0; +} + +/** + * rvt_get_rwqe - copy the next RWQE into the QP's RWQE + * @qp: the QP + * @wr_id_only: update qp->r_wr_id only, not qp->r_sge + * + * Return -1 if there is a local error, 0 if no RWQE is available, + * otherwise return 1. + * + * Can be called from interrupt level. + */ +int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) +{ + unsigned long flags; + struct rvt_rq *rq; + struct rvt_rwq *wq; + struct rvt_srq *srq; + struct rvt_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + u32 tail; + int ret; + + if (qp->ibqp.srq) { + srq = ibsrq_to_rvtsrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; + rq = &qp->r_rq; + } + + spin_lock_irqsave(&rq->lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + if (unlikely(tail == wq->head)) { + ret = 0; + goto unlock; + } + /* Make sure entry is read after head index is read. */ + smp_rmb(); + wqe = rvt_get_rwqe_ptr(rq, tail); + /* + * Even though we update the tail index in memory, the verbs + * consumer is not supposed to post more entries until a + * completion is generated. + */ + if (++tail >= rq->size) + tail = 0; + wq->tail = tail; + if (!wr_id_only && !init_sge(qp, wqe)) { + ret = -1; + goto unlock; + } + qp->r_wr_id = wqe->wr_id; + + ret = 1; + set_bit(RVT_R_WRID_VALID, &qp->r_aflags); + if (handler) { + u32 n; + + /* + * Validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; + else + n -= tail; + if (n < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + goto bail; + } + } +unlock: + spin_unlock_irqrestore(&rq->lock, flags); +bail: + return ret; +} +EXPORT_SYMBOL(rvt_get_rwqe); + /** * qp_comm_est - handle trap with QP established * @qp: the QP @@ -2076,7 +2225,7 @@ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth) to = rvt_aeth_to_usec(aeth); trace_rvt_rnrnak_add(qp, to); hrtimer_start(&qp->s_rnr_timer, - ns_to_ktime(1000 * to), HRTIMER_MODE_REL); + ns_to_ktime(1000 * to), HRTIMER_MODE_REL_PINNED); } EXPORT_SYMBOL(rvt_add_rnr_timer); diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h index a315850aa9bb..df8e1adbef9d 100644 --- a/drivers/infiniband/sw/rdmavt/trace_cq.h +++ b/drivers/infiniband/sw/rdmavt/trace_cq.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -71,6 +71,39 @@ __print_symbolic(opcode, \ wc_opcode_name(RECV), \ wc_opcode_name(RECV_RDMA_WITH_IMM)) +#define CQ_ATTR_PRINT \ +"[%s] user cq %s cqe %u comp_vector %d comp_vector_cpu %d flags %x" + +DECLARE_EVENT_CLASS(rvt_cq_template, + TP_PROTO(struct rvt_cq *cq, + const struct ib_cq_init_attr *attr), + TP_ARGS(cq, attr), + TP_STRUCT__entry(RDI_DEV_ENTRY(cq->rdi) + __field(struct rvt_mmap_info *, ip) + __field(unsigned int, cqe) + __field(int, comp_vector) + __field(int, comp_vector_cpu) + __field(u32, flags) + ), + TP_fast_assign(RDI_DEV_ASSIGN(cq->rdi) + __entry->ip = cq->ip; + __entry->cqe = attr->cqe; + __entry->comp_vector = attr->comp_vector; + __entry->comp_vector_cpu = + cq->comp_vector_cpu; + __entry->flags = attr->flags; + ), + TP_printk(CQ_ATTR_PRINT, __get_str(dev), + __entry->ip ? "true" : "false", __entry->cqe, + __entry->comp_vector, __entry->comp_vector_cpu, + __entry->flags + ) +); + +DEFINE_EVENT(rvt_cq_template, rvt_create_cq, + TP_PROTO(struct rvt_cq *cq, const struct ib_cq_init_attr *attr), + TP_ARGS(cq, attr)); + #define CQ_PRN \ "[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x" diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 434199d0bc96..17e4abc067af 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -49,6 +49,7 @@ #include <linux/kernel.h> #include <linux/dma-mapping.h> #include "vt.h" +#include "cq.h" #include "trace.h" #define RVT_UVERBS_ABI_VERSION 2 @@ -58,21 +59,18 @@ MODULE_DESCRIPTION("RDMA Verbs Transport Library"); static int rvt_init(void) { - /* - * rdmavt does not need to do anything special when it starts up. All it - * needs to do is sit and wait until a driver attempts registration. - */ - return 0; + int ret = rvt_driver_cq_init(); + + if (ret) + pr_err("Error in driver CQ init.\n"); + + return ret; } module_init(rvt_init); static void rvt_cleanup(void) { - /* - * Nothing to do at exit time either. The module won't be able to be - * removed until all drivers are gone which means all the dev structs - * are gone so there is really nothing to do. - */ + rvt_cq_exit(); } module_exit(rvt_cleanup); @@ -777,11 +775,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) } /* Completion queues */ - ret = rvt_driver_cq_init(rdi); - if (ret) { - pr_err("Error in driver CQ init.\n"); - goto bail_mr; - } + spin_lock_init(&rdi->n_cqs_lock); /* DMA Operations */ rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops; @@ -829,14 +823,15 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); rdi->ibdev.node_type = RDMA_NODE_IB_CA; - rdi->ibdev.num_comp_vectors = 1; + if (!rdi->ibdev.num_comp_vectors) + rdi->ibdev.num_comp_vectors = 1; rdi->ibdev.driver_id = driver_id; /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); - goto bail_cq; + goto bail_mr; } rvt_create_mad_agents(rdi); @@ -844,9 +839,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) rvt_pr_info(rdi, "Registration with rdmavt done.\n"); return ret; -bail_cq: - rvt_cq_exit(rdi); - bail_mr: rvt_mr_exit(rdi); @@ -870,7 +862,6 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) rvt_free_mad_agents(rdi); ib_unregister_device(&rdi->ibdev); - rvt_cq_exit(rdi); rvt_mr_exit(rdi); rvt_qp_exit(rdi); } |