diff options
Diffstat (limited to 'drivers/infiniband/sw')
29 files changed, 585 insertions, 536 deletions
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index fe99da0ff060..ee02c6176007 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * @udata: user data or NULL for kernel object * * Return: 0 on success */ diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index a85571a4cf57..13d7f66eadab 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -552,7 +552,6 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) /** * rvt_driver_cq_init - Init cq resources on behalf of driver - * @rdi: rvt dev structure * * Return: 0 on success */ @@ -568,7 +567,6 @@ int rvt_driver_cq_init(void) /** * rvt_cq_exit - tear down cq reources - * @rdi: rvt dev structure */ void rvt_cq_exit(void) { diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index a6a39f01dca3..72f6534fbb52 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -390,7 +390,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (length == 0) return ERR_PTR(-EINVAL); - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(pd->device, start, length, mr_access_flags); if (IS_ERR(umem)) return (void *)umem; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0b0a241c57ff..7858d499db03 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -61,6 +61,8 @@ #define RVT_RWQ_COUNT_THRESHOLD 16 static void rvt_rc_timeout(struct timer_list *t); +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type); /* * Convert the AETH RNR timeout code into the number of microseconds. @@ -452,40 +454,41 @@ no_qp_table: } /** - * free_all_qps - check for QPs still in use + * rvt_free_qp_cb - callback function to reset a qp + * @qp: the qp to reset + * @v: a 64-bit value + * + * This function resets the qp and removes it from the + * qp hash table. + */ +static void rvt_free_qp_cb(struct rvt_qp *qp, u64 v) +{ + unsigned int *qp_inuse = (unsigned int *)v; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* Reset the qp and remove it from the qp hash list */ + rvt_reset_qp(rdi, qp, qp->ibqp.qp_type); + + /* Increment the qp_inuse count */ + (*qp_inuse)++; +} + +/** + * rvt_free_all_qps - check for QPs still in use * @rdi: rvt device info structure * * There should not be any QPs still in use. * Free memory for table. + * Return the number of QPs still in use. */ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) { - unsigned long flags; - struct rvt_qp *qp; - unsigned n, qp_inuse = 0; - spinlock_t *ql; /* work around too long line below */ - - if (rdi->driver_f.free_all_qps) - qp_inuse = rdi->driver_f.free_all_qps(rdi); + unsigned int qp_inuse = 0; qp_inuse += rvt_mcast_tree_empty(rdi); - if (!rdi->qp_dev) - return qp_inuse; - - ql = &rdi->qp_dev->qpt_lock; - spin_lock_irqsave(ql, flags); - for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { - qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], - lockdep_is_held(ql)); - RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); + rvt_qp_iter(rdi, (u64)&qp_inuse, rvt_free_qp_cb); - for (; qp; qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql))) - qp_inuse++; - } - spin_unlock_irqrestore(ql, flags); - synchronize_rcu(); return qp_inuse; } @@ -902,14 +905,14 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } /** - * rvt_reset_qp - initialize the QP state to the reset state + * _rvt_reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type * * r_lock, s_hlock, and s_lock are required to be held by the caller */ -static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) +static void _rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) __must_hold(&qp->s_lock) __must_hold(&qp->s_hlock) __must_hold(&qp->r_lock) @@ -955,6 +958,27 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, lockdep_assert_held(&qp->s_lock); } +/** + * rvt_reset_qp - initialize the QP state to the reset state + * @rdi: the device info + * @qp: the QP to reset + * @type: the QP type + * + * This is the wrapper function to acquire the r_lock, s_hlock, and s_lock + * before calling _rvt_reset_qp(). + */ +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) +{ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + _rvt_reset_qp(rdi, qp, type); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); +} + /** rvt_free_qpn - Free a qpn from the bit map * @qpt: QP table * @qpn: queue pair number to free @@ -1546,7 +1570,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: if (qp->state != IB_QPS_RESET) - rvt_reset_qp(rdi, qp, ibqp->qp_type); + _rvt_reset_qp(rdi, qp, ibqp->qp_type); break; case IB_QPS_RTR: @@ -1695,13 +1719,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); rvt_reset_qp(rdi, qp, ibqp->qp_type); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); wait_event(qp->wait, !atomic_read(&qp->refcount)); /* qpn is now available for use again */ @@ -2563,10 +2581,9 @@ void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift) EXPORT_SYMBOL(rvt_add_retry_timer_ext); /** - * rvt_add_rnr_timer - add/start an rnr timer - * @qp - the QP - * @aeth - aeth of RNR timeout, simulated aeth for loopback - * add an rnr timer on the QP + * rvt_add_rnr_timer - add/start an rnr timer on the QP + * @qp: the QP + * @aeth: aeth of RNR timeout, simulated aeth for loopback */ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth) { @@ -2583,7 +2600,7 @@ EXPORT_SYMBOL(rvt_add_rnr_timer); /** * rvt_stop_rc_timers - stop all timers - * @qp - the QP + * @qp: the QP * stop any pending timers */ void rvt_stop_rc_timers(struct rvt_qp *qp) @@ -2617,7 +2634,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp) /** * rvt_del_timers_sync - wait for any timeout routines to exit - * @qp - the QP + * @qp: the QP */ void rvt_del_timers_sync(struct rvt_qp *qp) { @@ -2626,7 +2643,7 @@ void rvt_del_timers_sync(struct rvt_qp *qp) } EXPORT_SYMBOL(rvt_del_timers_sync); -/** +/* * This is called from s_timer for missing responses. */ static void rvt_rc_timeout(struct timer_list *t) @@ -2676,12 +2693,13 @@ EXPORT_SYMBOL(rvt_rc_rnr_retry); * rvt_qp_iter_init - initial for QP iteration * @rdi: rvt devinfo * @v: u64 value + * @cb: user-defined callback * * This returns an iterator suitable for iterating QPs * in the system. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * @cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * @@ -2712,7 +2730,7 @@ EXPORT_SYMBOL(rvt_qp_iter_init); /** * rvt_qp_iter_next - return the next QP in iter - * @iter - the iterator + * @iter: the iterator * * Fine grained QP iterator suitable for use * with debugfs seq_file mechanisms. @@ -2775,14 +2793,14 @@ EXPORT_SYMBOL(rvt_qp_iter_next); /** * rvt_qp_iter - iterate all QPs - * @rdi - rvt devinfo - * @v - a 64 bit value - * @cb - a callback + * @rdi: rvt devinfo + * @v: a 64-bit value + * @cb: a callback * * This provides a way for iterating all QPs. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 890d7b760d2e..977906cc0d11 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -195,7 +195,14 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth) } EXPORT_SYMBOL(rvt_get_credit); -/* rvt_restart_sge - rewind the sge state for a wqe */ +/** + * rvt_restart_sge - rewind the sge state for a wqe + * @ss: the sge state pointer + * @wqe: the wqe to rewind + * @len: the data length from the start of the wqe in bytes + * + * Returns the remaining data length. + */ u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len) { ss->sge = wqe->sg_list[0]; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 18da1e1ea979..986265ad6e79 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -683,9 +683,10 @@ EXPORT_SYMBOL(rvt_unregister_device); /** * rvt_init_port - init internal data for driver port - * @rdi: rvt dev strut + * @rdi: rvt_dev_info struct * @port: rvt port * @port_index: 0 based index of ports, different from IB core port num + * @pkey_table: pkey_table for @port * * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index a8c11b5e1e94..0946a301a5c5 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -77,12 +77,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; - rxe->attr.fw_ver = RXE_FW_VER; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; - rxe->attr.vendor_id = RXE_VENDOR_ID; - rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; - rxe->attr.hw_ver = RXE_HW_VER; rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; @@ -94,22 +90,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_mr = RXE_MAX_MR; rxe->attr.max_pd = RXE_MAX_PD; rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; - rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; - rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; rxe->attr.atomic_cap = IB_ATOMIC_HCA; - rxe->attr.max_ee = RXE_MAX_EE; - rxe->attr.max_rdd = RXE_MAX_RDD; - rxe->attr.max_mw = RXE_MAX_MW; - rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; - rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; rxe->attr.max_ah = RXE_MAX_AH; - rxe->attr.max_fmr = RXE_MAX_FMR; - rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; rxe->attr.max_srq = RXE_MAX_SRQ; rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index ecf6e659c0da..fb07eed9e402 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -65,10 +65,6 @@ */ #define RXE_UVERBS_ABI_VERSION 2 -#define RDMA_LINK_PHYS_STATE_LINK_UP (5) -#define RDMA_LINK_PHYS_STATE_DISABLED (3) -#define RDMA_LINK_PHYS_STATE_POLLING (2) - #define RXE_ROCE_V2_SPORT (0xc000) static inline u32 rxe_crc32(struct rxe_dev *rxe, diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 116cafc9afcf..4bc88708b355 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -329,7 +329,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } return COMPST_ERROR_RETRY; @@ -463,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } @@ -479,7 +479,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } @@ -725,7 +725,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } if (pkt) { diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ea6a819b7167..e83c7b518bfa 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -169,7 +169,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, void *vaddr; int err; - umem = ib_umem_get(udata, start, length, access, 0); + umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { pr_warn("err %d from rxe_umem_get\n", (int)PTR_ERR(umem)); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 5a3474f9351b..312c2fc961c0 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -117,10 +117,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, memcpy(&fl6.daddr, daddr, sizeof(*daddr)); fl6.flowi6_proto = IPPROTO_UDP; - if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk), - recv_sockets.sk6->sk, &ndst, &fl6))) { + ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), + recv_sockets.sk6->sk, &fl6, + NULL); + if (unlikely(IS_ERR(ndst))) { pr_err_ratelimited("no route to %pI6\n", daddr); - goto put; + return NULL; } if (unlikely(ndst->error)) { diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 1abed47ca221..f59616b02477 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -34,6 +34,8 @@ #ifndef RXE_PARAM_H #define RXE_PARAM_H +#include <uapi/rdma/rdma_user_rxe.h> + static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) { if (mtu < 256) @@ -60,15 +62,10 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) /* default/initial rxe device parameter settings */ enum rxe_device_param { - RXE_FW_VER = 0, RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, - RXE_VENDOR_ID = 0, - RXE_VENDOR_PART_ID = 0, - RXE_HW_VER = 0, RXE_MAX_QP = 0x10000, RXE_MAX_QP_WR = 0x4000, - RXE_MAX_INLINE_DATA = 400, RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_AUTO_PATH_MIG @@ -81,27 +78,22 @@ enum rxe_device_param { | IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG, RXE_MAX_SGE = 32, + RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + + sizeof(struct ib_sge) * RXE_MAX_SGE, + RXE_MAX_INLINE_DATA = RXE_MAX_WQE_SIZE - + sizeof(struct rxe_send_wqe), RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 256 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, - RXE_MAX_EE_RD_ATOM = 0, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, - RXE_MAX_EE_INIT_RD_ATOM = 0, - RXE_MAX_EE = 0, - RXE_MAX_RDD = 0, - RXE_MAX_MW = 0, - RXE_MAX_RAW_IPV6_QP = 0, - RXE_MAX_RAW_ETHY_QP = 0, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, RXE_MAX_AH = 100, - RXE_MAX_FMR = 0, - RXE_MAX_MAP_PER_FMR = 0, RXE_MAX_SRQ = 960, RXE_MAX_SRQ_WR = 0x4000, RXE_MIN_SRQ_WR = 1, @@ -154,7 +146,7 @@ enum rxe_port_param { RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, RXE_PORT_ACTIVE_SPEED = 1, RXE_PORT_PKEY_TBL_LEN = 64, - RXE_PORT_PHYS_STATE = 2, + RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING, RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, }; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index e2c6d1cedf41..ec21f616ac98 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -237,19 +237,17 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, */ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32_generic(qp_num(qp), 14) & 0x3fff); - qp->sq.max_wr = init->cap.max_send_wr; - qp->sq.max_sge = init->cap.max_send_sge; - qp->sq.max_inline = init->cap.max_inline_data; - wqe_size = max_t(int, sizeof(struct rxe_send_wqe) + - qp->sq.max_sge * sizeof(struct ib_sge), - sizeof(struct rxe_send_wqe) + - qp->sq.max_inline); + /* These caps are limited by rxe_qp_chk_cap() done by the caller */ + wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), + init->cap.max_inline_data); + qp->sq.max_sge = init->cap.max_send_sge = + wqe_size / sizeof(struct ib_sge); + qp->sq.max_inline = init->cap.max_inline_data = wqe_size; + wqe_size += sizeof(struct rxe_send_wqe); - qp->sq.queue = rxe_queue_init(rxe, - &qp->sq.max_wr, - wqe_size); + qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size); if (!qp->sq.queue) return -ENOMEM; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f9a492ed900b..831ad578a7b2 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -389,7 +389,7 @@ void rxe_rcv(struct sk_buff *skb) calc_icrc = rxe_icrc_hdr(pkt, skb); calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt), - payload_size(pkt)); + payload_size(pkt) + bth_pad(pkt)); calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { if (skb->protocol == htons(ETH_P_IPV6)) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index c5d9b558fa90..e5031172c019 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + paylen; + + memset(pad, 0, bth_pad(pkt)); + crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt)); + } } p = payload_addr(pkt) + paylen + bth_pad(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1cbfbd98eb22..c4a8195bf670 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -732,6 +732,13 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (err) pr_err("Failed copying memory\n"); + if (bth_pad(&ack_pkt)) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u8 *pad = payload_addr(&ack_pkt) + payload; + + memset(pad, 0, bth_pad(&ack_pkt)); + icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); + } p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4ebdfcf4d33e..9dd4bd7aea92 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -69,11 +69,11 @@ static int rxe_query_port(struct ib_device *dev, &attr->active_width); if (attr->state == IB_PORT_ACTIVE) - attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP; + attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; else if (dev_get_flags(rxe->ndev) & IFF_UP) - attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING; + attr->phys_state = IB_PORT_PHYS_STATE_POLLING; else - attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED; + attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; mutex_unlock(&rxe->usdev_lock); @@ -106,6 +106,10 @@ static int rxe_modify_device(struct ib_device *dev, { struct rxe_dev *rxe = to_rdev(dev); + if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) + return -EOPNOTSUPP; + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); @@ -1171,6 +1175,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; + dev->dev.dma_parms = &rxe->dma_parms; + rxe->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; dma_coerce_mask_and_coherent(&dev->dev, dma_get_required_mask(&dev->dev)); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5c4b2239129c..92de39c4a7c1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -384,6 +384,7 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; + struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; @@ -407,7 +408,7 @@ struct rxe_dev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* guard mmap_offset */ - int mmap_offset; + u64 mmap_offset; atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 77b1aabf6ff3..af5e9f8c0fcd 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -7,6 +7,7 @@ #define _SIW_H #include <rdma/ib_verbs.h> +#include <rdma/restrack.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <crypto/hash.h> @@ -70,6 +71,7 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; + struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; @@ -98,18 +100,9 @@ struct siw_device { struct work_struct netdev_down; }; -struct siw_uobj { - void *addr; - u32 size; -}; - struct siw_ucontext { struct ib_ucontext base_ucontext; struct siw_device *sdev; - - /* xarray of user mappable objects */ - struct xarray xa; - u32 uobj_nextkey; }; /* @@ -138,9 +131,9 @@ struct siw_umem { }; struct siw_pble { - u64 addr; /* Address of assigned user buffer */ - u64 size; /* Size of this entry */ - u64 pbl_off; /* Total offset from start of PBL */ + dma_addr_t addr; /* Address of assigned buffer */ + unsigned int size; /* Size of this entry */ + unsigned long pbl_off; /* Total offset from start of PBL */ }; struct siw_pbl { @@ -149,8 +142,6 @@ struct siw_pbl { struct siw_pble pbe[1]; }; -struct siw_mr; - /* * Generic memory representation for registered siw memory. * Memory lookup always via higher 24 bit of STag (STag index). @@ -219,8 +210,7 @@ struct siw_cq { u32 cq_put; u32 cq_get; u32 num_cqe; - bool kernel_verbs; - u32 xa_cq_index; /* mmap information for CQE array */ + struct rdma_user_mmap_entry *cq_entry; /* mmap info for CQE array */ u32 id; /* For debugging only */ }; @@ -263,9 +253,9 @@ struct siw_srq { u32 rq_put; u32 rq_get; u32 num_rqe; /* max # of wqe's allowed */ - u32 xa_srq_index; /* mmap information for SRQ array */ - char armed; /* inform user if limit hit */ - char kernel_verbs; /* '1' if kernel client */ + struct rdma_user_mmap_entry *srq_entry; /* mmap info for SRQ array */ + bool armed:1; /* inform user if limit hit */ + bool is_kernel_res:1; /* true if kernel client */ }; struct siw_qp_attrs { @@ -428,13 +418,11 @@ struct siw_iwarp_tx { }; struct siw_qp { + struct ib_qp base_qp; struct siw_device *sdev; - struct ib_qp *ib_qp; struct kref ref; - u32 qp_num; struct list_head devq; int tx_cpu; - bool kernel_verbs; struct siw_qp_attrs attrs; struct siw_cep *cep; @@ -477,16 +465,11 @@ struct siw_qp { u8 layer : 4, etype : 4; u8 ecode; } term_info; - u32 xa_sq_index; /* mmap information for SQE array */ - u32 xa_rq_index; /* mmap information for RQE array */ + struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */ + struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */ struct rcu_head rcu; }; -struct siw_base_qp { - struct ib_qp base_qp; - struct siw_qp *qp; -}; - /* helper macros */ #define rx_qp(rx) container_of(rx, struct siw_qp, rx_stream) #define tx_qp(tx) container_of(tx, struct siw_qp, tx_ctx) @@ -503,6 +486,11 @@ struct iwarp_msg_info { int (*rx_data)(struct siw_qp *qp); }; +struct siw_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + void *address; +}; + /* Global siw parameters. Currently set in siw_main.c */ extern const bool zcopy_tx; extern const bool try_gso; @@ -577,14 +565,9 @@ static inline struct siw_ucontext *to_siw_ctx(struct ib_ucontext *base_ctx) return container_of(base_ctx, struct siw_ucontext, base_ucontext); } -static inline struct siw_base_qp *to_siw_base_qp(struct ib_qp *base_qp) -{ - return container_of(base_qp, struct siw_base_qp, base_qp); -} - static inline struct siw_qp *to_siw_qp(struct ib_qp *base_qp) { - return to_siw_base_qp(base_qp)->qp; + return container_of(base_qp, struct siw_qp, base_qp); } static inline struct siw_cq *to_siw_cq(struct ib_cq *base_cq) @@ -607,6 +590,12 @@ static inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr) return container_of(base_mr, struct siw_mr, base_mr); } +static inline struct siw_user_mmap_entry * +to_siw_mmap_entry(struct rdma_user_mmap_entry *rdma_mmap) +{ + return container_of(rdma_mmap, struct siw_user_mmap_entry, rdma_entry); +} + static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id) { struct siw_qp *qp; @@ -623,7 +612,7 @@ static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id) static inline u32 qp_id(struct siw_qp *qp) { - return qp->qp_num; + return qp->base_qp.qp_num; } static inline void siw_qp_get(struct siw_qp *qp) @@ -734,7 +723,7 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) "MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__) #define siw_dbg_cep(cep, fmt, ...) \ - ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \ + ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt, \ cep, __func__, ##__VA_ARGS__) void siw_cq_flush(struct siw_cq *cq); diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 9ce8a1b925d2..c5651a96b196 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -29,7 +29,7 @@ * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR */ static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR; -static const bool relaxed_ird_negotiation = 1; +static const bool relaxed_ird_negotiation = true; static void siw_cm_llp_state_change(struct sock *s); static void siw_cm_llp_data_ready(struct sock *s); @@ -355,8 +355,8 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, getname_local(cep->sock, &event.local_addr); getname_peer(cep->sock, &event.remote_addr); } - siw_dbg_cep(cep, "[QP %u]: id 0x%p, reason=%d, status=%d\n", - cep->qp ? qp_id(cep->qp) : -1, id, reason, status); + siw_dbg_cep(cep, "[QP %u]: reason=%d, status=%d\n", + cep->qp ? qp_id(cep->qp) : UINT_MAX, reason, status); return id->event_handler(id, &event); } @@ -947,8 +947,6 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; - siw_dbg_cep(cep, "listen socket 0x%p, new 0x%p\n", s, new_s); - if (siw_tcp_nagle == false) { int val = 1; @@ -1011,7 +1009,8 @@ static void siw_cm_work_handler(struct work_struct *w) cep = work->cep; siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n", - cep->qp ? qp_id(cep->qp) : -1, work->type, cep->state); + cep->qp ? qp_id(cep->qp) : UINT_MAX, + work->type, cep->state); siw_cep_set_inuse(cep); @@ -1145,9 +1144,9 @@ static void siw_cm_work_handler(struct work_struct *w) } if (release_cep) { siw_dbg_cep(cep, - "release: timer=%s, QP[%u], id 0x%p\n", + "release: timer=%s, QP[%u]\n", cep->mpa_timer ? "y" : "n", - cep->qp ? qp_id(cep->qp) : -1, cep->cm_id); + cep->qp ? qp_id(cep->qp) : UINT_MAX); siw_cancel_mpatimer(cep); @@ -1211,8 +1210,8 @@ int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type) else delay = MPAREP_TIMEOUT; } - siw_dbg_cep(cep, "[QP %u]: work type: %d, work 0x%p, timeout %lu\n", - cep->qp ? qp_id(cep->qp) : -1, type, work, delay); + siw_dbg_cep(cep, "[QP %u]: work type: %d, timeout %lu\n", + cep->qp ? qp_id(cep->qp) : -1, type, delay); queue_delayed_work(siw_cm_wq, &work->work, delay); @@ -1226,10 +1225,9 @@ static void siw_cm_llp_data_ready(struct sock *sk) read_lock(&sk->sk_callback_lock); cep = sk_to_cep(sk); - if (!cep) { - WARN_ON(1); + if (!cep) goto out; - } + siw_dbg_cep(cep, "state: %d\n", cep->state); switch (cep->state) { @@ -1374,22 +1372,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) rv = -EINVAL; goto error; } - if (v4) - siw_dbg_qp(qp, - "id 0x%p, pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", - id, pd_len, - &((struct sockaddr_in *)(laddr))->sin_addr, - ntohs(((struct sockaddr_in *)(laddr))->sin_port), - &((struct sockaddr_in *)(raddr))->sin_addr, - ntohs(((struct sockaddr_in *)(raddr))->sin_port)); - else - siw_dbg_qp(qp, - "id 0x%p, pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", - id, pd_len, - &((struct sockaddr_in6 *)(laddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), - &((struct sockaddr_in6 *)(raddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port)); + siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n", pd_len, laddr, + raddr); rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); if (rv < 0) @@ -1508,14 +1492,13 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (rv >= 0) { rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT); if (!rv) { - siw_dbg_cep(cep, "id 0x%p, [QP %u]: exit\n", id, - qp_id(qp)); + siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp)); siw_cep_set_free(cep); return 0; } } error: - siw_dbg_qp(qp, "failed: %d\n", rv); + siw_dbg(id->device, "failed: %d\n", rv); if (cep) { siw_socket_disassoc(s); @@ -1540,7 +1523,8 @@ error: } else if (s) { sock_release(s); } - siw_qp_put(qp); + if (qp) + siw_qp_put(qp); return rv; } @@ -1580,7 +1564,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_cancel_mpatimer(cep); if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { - siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + siw_dbg_cep(cep, "out of state\n"); siw_cep_set_free(cep); siw_cep_put(cep); @@ -1601,7 +1585,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) up_write(&qp->state_lock); goto error; } - siw_dbg_cep(cep, "id 0x%p\n", id); + siw_dbg_cep(cep, "[QP %d]\n", params->qpn); if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { siw_dbg_cep(cep, "peer allows GSO on TX\n"); @@ -1611,8 +1595,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->ird > sdev->attrs.max_ird) { siw_dbg_cep( cep, - "id 0x%p, [QP %u]: ord %d (max %d), ird %d (max %d)\n", - id, qp_id(qp), params->ord, sdev->attrs.max_ord, + "[QP %u]: ord %d (max %d), ird %d (max %d)\n", + qp_id(qp), params->ord, sdev->attrs.max_ord, params->ird, sdev->attrs.max_ird); rv = -EINVAL; up_write(&qp->state_lock); @@ -1624,8 +1608,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (params->private_data_len > max_priv_data) { siw_dbg_cep( cep, - "id 0x%p, [QP %u]: private data length: %d (max %d)\n", - id, qp_id(qp), params->private_data_len, max_priv_data); + "[QP %u]: private data length: %d (max %d)\n", + qp_id(qp), params->private_data_len, max_priv_data); rv = -EINVAL; up_write(&qp->state_lock); goto error; @@ -1679,7 +1663,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) qp_attrs.flags = SIW_MPA_CRC; qp_attrs.state = SIW_QP_STATE_RTS; - siw_dbg_cep(cep, "id 0x%p, [QP%u]: moving to rts\n", id, qp_id(qp)); + siw_dbg_cep(cep, "[QP%u]: moving to rts\n", qp_id(qp)); /* Associate QP with CEP */ siw_cep_get(cep); @@ -1700,8 +1684,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (rv) goto error; - siw_dbg_cep(cep, "id 0x%p, [QP %u]: send mpa reply, %d byte pdata\n", - id, qp_id(qp), params->private_data_len); + siw_dbg_cep(cep, "[QP %u]: send mpa reply, %d byte pdata\n", + qp_id(qp), params->private_data_len); rv = siw_send_mpareqrep(cep, params->private_data, params->private_data_len); @@ -1759,14 +1743,14 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) siw_cancel_mpatimer(cep); if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { - siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + siw_dbg_cep(cep, "out of state\n"); siw_cep_set_free(cep); siw_cep_put(cep); /* put last reference */ return -ECONNRESET; } - siw_dbg_cep(cep, "id 0x%p, cep->state %d, pd_len %d\n", id, cep->state, + siw_dbg_cep(cep, "cep->state %d, pd_len %d\n", cep->state, pd_len); if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) { @@ -1804,14 +1788,14 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, sizeof(s_val)); if (rv) { - siw_dbg(id->device, "id 0x%p: setsockopt error: %d\n", id, rv); + siw_dbg(id->device, "setsockopt error: %d\n", rv); goto error; } rv = s->ops->bind(s, laddr, addr_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); if (rv) { - siw_dbg(id->device, "id 0x%p: socket bind error: %d\n", id, rv); + siw_dbg(id->device, "socket bind error: %d\n", rv); goto error; } cep = siw_cep_alloc(sdev); @@ -1824,13 +1808,13 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, rv = siw_cm_alloc_work(cep, backlog); if (rv) { siw_dbg(id->device, - "id 0x%p: alloc_work error %d, backlog %d\n", id, + "alloc_work error %d, backlog %d\n", rv, backlog); goto error; } rv = s->ops->listen(s, backlog); if (rv) { - siw_dbg(id->device, "id 0x%p: listen error %d\n", id, rv); + siw_dbg(id->device, "listen error %d\n", rv); goto error; } cep->cm_id = id; @@ -1868,14 +1852,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; - if (addr_family == AF_INET) - siw_dbg(id->device, "Listen at laddr %pI4 %u\n", - &(((struct sockaddr_in *)laddr)->sin_addr), - ((struct sockaddr_in *)laddr)->sin_port); - else - siw_dbg(id->device, "Listen at laddr %pI6 %u\n", - &(((struct sockaddr_in6 *)laddr)->sin6_addr), - ((struct sockaddr_in6 *)laddr)->sin6_port); + siw_dbg(id->device, "Listen at laddr %pISp\n", laddr); return 0; @@ -1914,8 +1891,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) list_del(p); - siw_dbg_cep(cep, "id 0x%p: drop cep, state %d\n", id, - cep->state); + siw_dbg_cep(cep, "drop cep, state %d\n", cep->state); siw_cep_set_inuse(cep); @@ -1937,7 +1913,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) /* * siw_create_listen - Create resources for a listener's IWCM ID @id * - * Listens on the socket addresses id->local_addr and id->remote_addr. + * Listens on the socket address id->local_addr. * * If the listener's @id provides a specific local IP address, at most one * listening socket is created and associated with @id. @@ -1952,7 +1928,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) struct net_device *dev = to_siw_dev(id->device)->netdev; int rv = 0, listeners = 0; - siw_dbg(id->device, "id 0x%p: backlog %d\n", id, backlog); + siw_dbg(id->device, "backlog %d\n", backlog); /* * For each attached address of the interface, create a @@ -1961,16 +1937,16 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) */ if (id->local_addr.ss_family == AF_INET) { struct in_device *in_dev = in_dev_get(dev); - struct sockaddr_in s_laddr, *s_raddr; + struct sockaddr_in s_laddr; const struct in_ifaddr *ifa; + if (!in_dev) { + rv = -ENODEV; + goto out; + } memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); - s_raddr = (struct sockaddr_in *)&id->remote_addr; - siw_dbg(id->device, - "id 0x%p: laddr %pI4:%d, raddr %pI4:%d\n", - id, &s_laddr.sin_addr, ntohs(s_laddr.sin_port), - &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); rtnl_lock(); in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -1990,25 +1966,26 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) } else if (id->local_addr.ss_family == AF_INET6) { struct inet6_dev *in6_dev = in6_dev_get(dev); struct inet6_ifaddr *ifp; - struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), - *s_raddr = &to_sockaddr_in6(id->remote_addr); + struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr); - siw_dbg(id->device, - "id 0x%p: laddr %pI6:%d, raddr %pI6:%d\n", - id, &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), - &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); + if (!in6_dev) { + rv = -ENODEV; + goto out; + } + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); - read_lock_bh(&in6_dev->lock); + rtnl_lock(); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { - struct sockaddr_in6 bind_addr; - + if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) + continue; if (ipv6_addr_any(&s_laddr->sin6_addr) || ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) { - bind_addr.sin6_family = AF_INET6; - bind_addr.sin6_port = s_laddr->sin6_port; - bind_addr.sin6_flowinfo = 0; - bind_addr.sin6_addr = ifp->addr; - bind_addr.sin6_scope_id = dev->ifindex; + struct sockaddr_in6 bind_addr = { + .sin6_family = AF_INET6, + .sin6_port = s_laddr->sin6_port, + .sin6_flowinfo = 0, + .sin6_addr = ifp->addr, + .sin6_scope_id = dev->ifindex }; rv = siw_listen_address(id, backlog, (struct sockaddr *)&bind_addr, @@ -2017,28 +1994,26 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) listeners++; } } - read_unlock_bh(&in6_dev->lock); - + rtnl_unlock(); in6_dev_put(in6_dev); } else { - return -EAFNOSUPPORT; + rv = -EAFNOSUPPORT; } +out: if (listeners) rv = 0; else if (!rv) rv = -EINVAL; - siw_dbg(id->device, "id 0x%p: %s\n", id, rv ? "FAIL" : "OK"); + siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK"); return rv; } int siw_destroy_listen(struct iw_cm_id *id) { - siw_dbg(id->device, "id 0x%p\n", id); - if (!id->provider_data) { - siw_dbg(id->device, "id 0x%p: no cep(s)\n", id); + siw_dbg(id->device, "no cep(s)\n"); return 0; } siw_drop_listeners(id); diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index e381ae9b7d62..d68e37859e73 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -65,15 +65,16 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) * reaped here, which do not hold a QP reference * and do not qualify for memory extension verbs. */ - if (likely(cq->kernel_verbs)) { + if (likely(rdma_is_kernel_res(&cq->base_cq.res))) { if (cqe->flags & SIW_WQE_REM_INVAL) { wc->ex.invalidate_rkey = cqe->inval_stag; wc->wc_flags = IB_WC_WITH_INVALIDATE; } wc->qp = cqe->base_qp; - siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n", + siw_dbg_cq(cq, + "idx %u, type %d, flags %2x, id 0x%pK\n", cq->cq_get % cq->num_cqe, cqe->opcode, - cqe->flags, (void *)cqe->id); + cqe->flags, (void *)(uintptr_t)cqe->id); } WRITE_ONCE(cqe->flags, 0); cq->cq_get++; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 05a92f997f60..96ed349c0939 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/dma-mapping.h> +#include <net/addrconf.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/rdma_netlink.h> @@ -243,29 +244,11 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id) * siw_qp_id2obj() increments object reference count */ siw_qp_put(qp); - return qp->ib_qp; + return &qp->base_qp; } return NULL; } -static void siw_verbs_sq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_sq_flush(qp); - up_write(&qp->state_lock); -} - -static void siw_verbs_rq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_rq_flush(qp); - up_write(&qp->state_lock); -} - static const struct ib_device_ops siw_device_ops = { .owner = THIS_MODULE, .uverbs_abi_ver = SIW_ABI_VERSION, @@ -284,8 +267,6 @@ static const struct ib_device_ops siw_device_ops = { .destroy_cq = siw_destroy_cq, .destroy_qp = siw_destroy_qp, .destroy_srq = siw_destroy_srq, - .drain_rq = siw_verbs_rq_flush, - .drain_sq = siw_verbs_sq_flush, .get_dma_mr = siw_get_dma_mr, .get_port_immutable = siw_get_port_immutable, .iw_accept = siw_accept, @@ -298,6 +279,7 @@ static const struct ib_device_ops siw_device_ops = { .iw_rem_ref = siw_qp_put_ref, .map_mr_sg = siw_map_mr_sg, .mmap = siw_mmap, + .mmap_free = siw_mmap_free, .modify_qp = siw_verbs_modify_qp, .modify_srq = siw_modify_srq, .poll_cq = siw_poll_cq, @@ -350,15 +332,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->netdev = netdev; if (netdev->type != ARPHRD_LOOPBACK) { - memcpy(&base_dev->node_guid, netdev->dev_addr, 6); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + netdev->dev_addr); } else { /* * The loopback device does not have a HW address, * but connection mangagement lib expects gid != 0 */ - size_t gidlen = min_t(size_t, strlen(base_dev->name), 6); + size_t len = min_t(size_t, strlen(base_dev->name), 6); + char addr[6] = { }; - memcpy(&base_dev->node_guid, base_dev->name, gidlen); + memcpy(addr, base_dev->name, len); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + addr); } base_dev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -397,6 +383,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) base_dev->phys_port_cnt = 1; base_dev->dev.parent = parent; base_dev->dev.dma_ops = &dma_virt_ops; + base_dev->dev.dma_parms = &sdev->dma_parms; + sdev->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); ib_set_device_ops(base_dev, &siw_device_ops); diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index 67171c82b0c4..e2061dc0b043 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -63,15 +63,7 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages, bool dirty) { - struct page **p = chunk->plist; - - while (num_pages--) { - if (!PageDirty(*p) && dirty) - put_user_pages_dirty_lock(p, 1); - else - put_user_page(*p); - p++; - } + unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty); } void siw_umem_release(struct siw_umem *umem, bool dirty) @@ -197,12 +189,12 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, */ if (addr < mem->va || addr + len > mem->va + mem->len) { siw_dbg_pd(pd, "MEM interval len %d\n", len); - siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] out of bounds\n", - (unsigned long long)addr, - (unsigned long long)(addr + len)); - siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] STag=0x%08x\n", - (unsigned long long)mem->va, - (unsigned long long)(mem->va + mem->len), + siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n", + (void *)(uintptr_t)addr, + (void *)(uintptr_t)(addr + len)); + siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n", + (void *)(uintptr_t)mem->va, + (void *)(uintptr_t)(mem->va + mem->len), mem->stag); return -E_BASE_BOUNDS; @@ -330,7 +322,7 @@ out: * Optionally, provides remaining len within current element, and * current PBL index for later resume at same element. */ -u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) +dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) { int i = idx ? *idx : 0; @@ -434,7 +426,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) while (nents) { struct page **plist = &umem->page_chunk[i].plist[got]; - rv = get_user_pages(first_page_va, nents, + rv = pin_user_pages(first_page_va, nents, foll_flags | FOLL_LONGTERM, plist, NULL); if (rv < 0) diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index f43daf280891..db138c8423da 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -9,7 +9,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable); void siw_umem_release(struct siw_umem *umem, bool dirty); struct siw_pbl *siw_pbl_alloc(u32 num_buf); -u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); +dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); int siw_mem_add(struct siw_device *sdev, struct siw_mem *m); int siw_invalidate_stag(struct ib_pd *pd, u32 stag); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 0990307c5d2c..875d36d4b1c6 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp) */ void siw_qp_llp_write_space(struct sock *sk) { - struct siw_cep *cep = sk_to_cep(sk); + struct siw_cep *cep; - cep->sk_write_space(sk); + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (cep) { + cep->sk_write_space(sk); - if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) - (void)siw_sq_start(cep->qp); + if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + (void)siw_sq_start(cep->qp); + } + + read_unlock(&sk->sk_callback_lock); } static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) @@ -949,7 +956,7 @@ skip_irq: rv = -EINVAL; goto out; } - wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; wqe->sqe.sge[0].lkey = 0; wqe->sqe.num_sge = 1; } @@ -1063,8 +1070,8 @@ int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes, cqe->imm_data = 0; cqe->bytes = bytes; - if (cq->kernel_verbs) - cqe->base_qp = qp->ib_qp; + if (rdma_is_kernel_res(&cq->base_cq.res)) + cqe->base_qp = &qp->base_qp; else cqe->qp_id = qp_id(qp); @@ -1121,8 +1128,8 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, cqe->imm_data = 0; cqe->bytes = bytes; - if (cq->kernel_verbs) { - cqe->base_qp = qp->ib_qp; + if (rdma_is_kernel_res(&cq->base_cq.res)) { + cqe->base_qp = &qp->base_qp; if (inval_stag) { cqe_flags |= SIW_WQE_REM_INVAL; cqe->inval_stag = inval_stag; @@ -1290,13 +1297,12 @@ void siw_rq_flush(struct siw_qp *qp) int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp) { - int rv = xa_alloc(&sdev->qp_xa, &qp->ib_qp->qp_num, qp, xa_limit_32b, + int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b, GFP_KERNEL); if (!rv) { kref_init(&qp->ref); qp->sdev = sdev; - qp->qp_num = qp->ib_qp->qp_num; siw_dbg_qp(qp, "new QP\n"); } return rv; diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index f87657a11657..9ccce2909ac4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -38,9 +38,10 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, p = siw_get_upage(umem, dest_addr); if (unlikely(!p)) { - pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n", + pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n", __func__, qp_id(rx_qp(srx)), - (void *)dest_addr, (void *)umem->fp_addr); + (void *)(uintptr_t)dest_addr, + (void *)(uintptr_t)umem->fp_addr); /* siw internal error */ srx->skb_copied += copied; srx->skb_new -= copied; @@ -50,7 +51,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, pg_off = dest_addr & ~PAGE_MASK; bytes = min(len, (int)PAGE_SIZE - pg_off); - siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes); + siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes); dest = kmap_atomic(p); rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off, @@ -67,7 +68,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, return -EFAULT; } if (srx->mpa_crc_hd) { - if (rx_qp(srx)->kernel_verbs) { + if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) { crypto_shash_update(srx->mpa_crc_hd, (u8 *)(dest + pg_off), bytes); kunmap_atomic(dest); @@ -104,11 +105,11 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) { int rv; - siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len); + siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len); rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len); if (unlikely(rv)) { - pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n", + pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n", qp_id(rx_qp(srx)), __func__, len, kva, rv); return rv; @@ -132,7 +133,7 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, while (len) { int bytes; - u64 buf_addr = + dma_addr_t buf_addr = siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx); if (!buf_addr) break; @@ -387,7 +388,7 @@ static struct siw_wqe *siw_rqe_get(struct siw_qp *qp) struct siw_rqe *rqe2 = &srq->recvq[off]; if (!(rqe2->flags & SIW_WQE_VALID)) { - srq->armed = 0; + srq->armed = false; srq_event = true; } } @@ -485,8 +486,8 @@ int siw_proc_send(struct siw_qp *qp) mem_p = *mem; if (mem_p->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(sge->laddr + frx->sge_off), - sge_bytes); + (void *)(uintptr_t)(sge->laddr + frx->sge_off), + sge_bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + frx->sge_off, sge_bytes); @@ -598,8 +599,8 @@ int siw_proc_write(struct siw_qp *qp) if (mem->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(srx->ddp_to + srx->fpdu_part_rcvd), - bytes); + (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd), + bytes); else if (!mem->is_pbl) rv = siw_rx_umem(srx, mem->umem, srx->ddp_to + srx->fpdu_part_rcvd, bytes); @@ -841,8 +842,9 @@ int siw_proc_rresp(struct siw_qp *qp) bytes = min(srx->fpdu_part_rem, srx->skb_new); if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed), - bytes); + rv = siw_rx_kva(srx, + (void *)(uintptr_t)(sge->laddr + wqe->processed), + bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, bytes); @@ -1262,7 +1264,7 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error) if (wc_status == SIW_WC_SUCCESS) wc_status = SIW_WC_GENERAL_ERR; - } else if (qp->kernel_verbs && + } else if (rdma_is_kernel_res(&qp->base_qp.res) && rx_type(wqe) == SIW_OP_READ_LOCAL_INV) { /* * Handle any STag invalidation request diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 43020d2040fc..ae92c8080967 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -26,7 +26,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) { struct siw_pbl *pbl = mem->pbl; u64 offset = addr - mem->va; - u64 paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); + dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); if (paddr) return virt_to_page(paddr); @@ -37,7 +37,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) /* * Copy short payload at provided destination payload address */ -static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) +static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) { struct siw_wqe *wqe = &c_tx->wqe_active; struct siw_sge *sge = &wqe->sqe.sge[0]; @@ -50,16 +50,16 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) return 0; if (tx_flags(wqe) & SIW_WQE_INLINE) { - memcpy((void *)paddr, &wqe->sqe.sge[1], bytes); + memcpy(paddr, &wqe->sqe.sge[1], bytes); } else { struct siw_mem *mem = wqe->mem[0]; if (!mem->mem_obj) { /* Kernel client using kva */ - memcpy((void *)paddr, (void *)sge->laddr, bytes); + memcpy(paddr, + (const void *)(uintptr_t)sge->laddr, bytes); } else if (c_tx->in_syscall) { - if (copy_from_user((void *)paddr, - (const void __user *)sge->laddr, + if (copy_from_user(paddr, u64_to_user_ptr(sge->laddr), bytes)) return -EFAULT; } else { @@ -76,16 +76,15 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) if (unlikely(!p)) return -EFAULT; - buffer = kmap_atomic(p); + buffer = kmap(p); if (likely(PAGE_SIZE - off >= bytes)) { - memcpy((void *)paddr, buffer + off, bytes); - kunmap_atomic(buffer); + memcpy(paddr, buffer + off, bytes); } else { unsigned long part = bytes - (PAGE_SIZE - off); - memcpy((void *)paddr, buffer + off, part); - kunmap_atomic(buffer); + memcpy(paddr, buffer + off, part); + kunmap(p); if (!mem->is_pbl) p = siw_get_upage(mem->umem, @@ -97,11 +96,10 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) if (unlikely(!p)) return -EFAULT; - buffer = kmap_atomic(p); - memcpy((void *)(paddr + part), buffer, - bytes - part); - kunmap_atomic(buffer); + buffer = kmap(p); + memcpy(paddr + part, buffer, bytes - part); } + kunmap(p); } } return (int)bytes; @@ -166,7 +164,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_send); crc = (char *)&c_tx->pkt.send_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_SEND_REMOTE_INV: @@ -189,7 +187,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_send_inv); crc = (char *)&c_tx->pkt.send_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_WRITE: @@ -201,7 +199,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_rdma_write); crc = (char *)&c_tx->pkt.write_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_READ_RESPONSE: @@ -216,7 +214,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp); crc = (char *)&c_tx->pkt.write_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; default: @@ -398,15 +396,13 @@ static int siw_0copy_tx(struct socket *s, struct page **page, #define MAX_TRAILER (MPA_CRC_SIZE + 4) -static void siw_unmap_pages(struct page **pages, int hdr_len, int num_maps) +static void siw_unmap_pages(struct page **pp, unsigned long kmap_mask) { - if (hdr_len) { - ++pages; - --num_maps; - } - while (num_maps-- > 0) { - kunmap(*pages); - pages++; + while (kmap_mask) { + if (kmap_mask & BIT(0)) + kunmap(*pp); + pp++; + kmap_mask >>= 1; } } @@ -437,6 +433,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) unsigned int data_len = c_tx->bytes_unsent, hdr_len = 0, trl_len = 0, sge_off = c_tx->sge_off, sge_idx = c_tx->sge_idx, pbl_idx = c_tx->pbl_idx; + unsigned long kmap_mask = 0L; if (c_tx->state == SIW_SEND_HDR) { if (c_tx->use_sendpage) { @@ -463,8 +460,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!(tx_flags(wqe) & SIW_WQE_INLINE)) { mem = wqe->mem[sge_idx]; - if (!mem->mem_obj) - is_kva = 1; + is_kva = mem->mem_obj == NULL ? 1 : 0; } else { is_kva = 1; } @@ -473,7 +469,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) * tx from kernel virtual address: either inline data * or memory region with assigned kernel buffer */ - iov[seg].iov_base = (void *)(sge->laddr + sge_off); + iov[seg].iov_base = + (void *)(uintptr_t)(sge->laddr + sge_off); iov[seg].iov_len = sge_len; if (do_crc) @@ -500,12 +497,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) p = siw_get_upage(mem->umem, sge->laddr + sge_off); if (unlikely(!p)) { - if (hdr_len) - seg--; - if (!c_tx->use_sendpage && seg) { - siw_unmap_pages(page_array, - hdr_len, seg); - } + siw_unmap_pages(page_array, kmap_mask); wqe->processed -= c_tx->bytes_unsent; rv = -EFAULT; goto done_crc; @@ -515,24 +507,29 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!c_tx->use_sendpage) { iov[seg].iov_base = kmap(p) + fp_off; iov[seg].iov_len = plen; + + /* Remember for later kunmap() */ + kmap_mask |= BIT(seg); + if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, iov[seg].iov_base, plen); - } else if (do_crc) - crypto_shash_update( - c_tx->mpa_crc_hd, - page_address(p) + fp_off, - plen); + } else if (do_crc) { + crypto_shash_update(c_tx->mpa_crc_hd, + kmap(p) + fp_off, + plen); + kunmap(p); + } } else { - u64 pa = ((sge->laddr + sge_off) & PAGE_MASK); + u64 va = sge->laddr + sge_off; - page_array[seg] = virt_to_page(pa); + page_array[seg] = virt_to_page(va & PAGE_MASK); if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, - (void *)(sge->laddr + sge_off), + (void *)(uintptr_t)va, plen); } @@ -543,10 +540,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (++seg > (int)MAX_ARRAY) { siw_dbg_qp(tx_qp(c_tx), "to many fragments\n"); - if (!is_kva && !c_tx->use_sendpage) { - siw_unmap_pages(page_array, hdr_len, - seg - 1); - } + siw_unmap_pages(page_array, kmap_mask); wqe->processed -= c_tx->bytes_unsent; rv = -EMSGSIZE; goto done_crc; @@ -597,8 +591,7 @@ sge_done: } else { rv = kernel_sendmsg(s, &msg, iov, seg + 1, hdr_len + data_len + trl_len); - if (!is_kva) - siw_unmap_pages(page_array, hdr_len, seg); + siw_unmap_pages(page_array, kmap_mask); } if (rv < (int)hdr_len) { /* Not even complete hdr pushed or negative rv */ @@ -824,12 +817,13 @@ static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe) } } else { wqe->bytes = wqe->sqe.sge[0].length; - if (!qp->kernel_verbs) { + if (!rdma_is_kernel_res(&qp->base_qp.res)) { if (wqe->bytes > SIW_MAX_INLINE) { rv = -EINVAL; goto tx_error; } - wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].laddr = + (u64)(uintptr_t)&wqe->sqe.sge[1]; } } wqe->wr_status = SIW_WR_INPROGRESS; @@ -924,7 +918,7 @@ tx_error: static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) { - struct ib_mr *base_mr = (struct ib_mr *)sqe->base_mr; + struct ib_mr *base_mr = (struct ib_mr *)(uintptr_t)sqe->base_mr; struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mem *mem = siw_mem_id2obj(sdev, sqe->rkey >> 8); int rv = 0; @@ -954,8 +948,7 @@ static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) mem->stag = sqe->rkey; mem->perms = sqe->access; - siw_dbg_mem(mem, "STag now valid, MR va: 0x%016llx -> 0x%016llx\n", - mem->va, base_mr->iova); + siw_dbg_mem(mem, "STag 0x%08x now valid\n", sqe->rkey); mem->va = base_mr->iova; mem->stag_valid = 1; out: diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index e7f3a2379d9d..07e30138aaa1 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -34,44 +34,19 @@ static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = { [IB_QPS_ERR] = "ERR" }; -static u32 siw_create_uobj(struct siw_ucontext *uctx, void *vaddr, u32 size) +void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { - struct siw_uobj *uobj; - struct xa_limit limit = XA_LIMIT(0, SIW_UOBJ_MAX_KEY); - u32 key; + struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_entry); - uobj = kzalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return SIW_INVAL_UOBJ_KEY; - - if (xa_alloc_cyclic(&uctx->xa, &key, uobj, limit, &uctx->uobj_nextkey, - GFP_KERNEL) < 0) { - kfree(uobj); - return SIW_INVAL_UOBJ_KEY; - } - uobj->size = PAGE_ALIGN(size); - uobj->addr = vaddr; - - return key; -} - -static struct siw_uobj *siw_get_uobj(struct siw_ucontext *uctx, - unsigned long off, u32 size) -{ - struct siw_uobj *uobj = xa_load(&uctx->xa, off); - - if (uobj && uobj->size == size) - return uobj; - - return NULL; + kfree(entry); } int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) { struct siw_ucontext *uctx = to_siw_ctx(ctx); - struct siw_uobj *uobj; - unsigned long off = vma->vm_pgoff; - int size = vma->vm_end - vma->vm_start; + size_t size = vma->vm_end - vma->vm_start; + struct rdma_user_mmap_entry *rdma_entry; + struct siw_user_mmap_entry *entry; int rv = -EINVAL; /* @@ -79,18 +54,25 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) */ if (vma->vm_start & (PAGE_SIZE - 1)) { pr_warn("siw: mmap not page aligned\n"); - goto out; + return -EINVAL; + } + rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, vma); + if (!rdma_entry) { + siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n", + vma->vm_pgoff, size); + return -EINVAL; } - uobj = siw_get_uobj(uctx, off, size); - if (!uobj) { - siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %u\n", - off, size); + entry = to_siw_mmap_entry(rdma_entry); + + rv = remap_vmalloc_range(vma, entry->address, 0); + if (rv) { + pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, + size); goto out; } - rv = remap_vmalloc_range(vma, uobj->addr, 0); - if (rv) - pr_warn("remap_vmalloc_range failed: %lu, %u\n", off, size); out: + rdma_user_mmap_entry_put(rdma_entry); + return rv; } @@ -105,8 +87,6 @@ int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) rv = -ENOMEM; goto err_out; } - xa_init_flags(&ctx->xa, XA_FLAGS_ALLOC); - ctx->uobj_nextkey = 0; ctx->sdev = sdev; uresp.dev_id = sdev->vendor_part_id; @@ -135,19 +115,7 @@ err_out: void siw_dealloc_ucontext(struct ib_ucontext *base_ctx) { struct siw_ucontext *uctx = to_siw_ctx(base_ctx); - void *entry; - unsigned long index; - /* - * Make sure all user mmap objects are gone. Since QP, CQ - * and SRQ destroy routines destroy related objects, nothing - * should be found here. - */ - xa_for_each(&uctx->xa, index, entry) { - kfree(xa_erase(&uctx->xa, index)); - pr_warn("siw: dropping orphaned uobj at %lu\n", index); - } - xa_destroy(&uctx->xa); atomic_dec(&uctx->sdev->num_ctx); } @@ -206,7 +174,8 @@ int siw_query_port(struct ib_device *base_dev, u8 port, attr->gid_tbl_len = 1; attr->max_msg_sz = -1; attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); - attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3; + attr->phys_state = sdev->state == IB_PORT_ACTIVE ? + IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; attr->pkey_tbl_len = 1; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->state = sdev->state; @@ -292,6 +261,33 @@ void siw_qp_put_ref(struct ib_qp *base_qp) siw_qp_put(to_siw_qp(base_qp)); } +static struct rdma_user_mmap_entry * +siw_mmap_entry_insert(struct siw_ucontext *uctx, + void *address, size_t length, + u64 *offset) +{ + struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int rv; + + *offset = SIW_INVAL_UOBJ_KEY; + if (!entry) + return NULL; + + entry->address = address; + + rv = rdma_user_mmap_entry_insert(&uctx->base_ucontext, + &entry->rdma_entry, + length); + if (rv) { + kfree(entry); + return NULL; + } + + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + /* * siw_create_qp() * @@ -307,7 +303,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, struct ib_udata *udata) { struct siw_qp *qp = NULL; - struct siw_base_qp *siw_base_qp = NULL; struct ib_device *base_dev = pd->device; struct siw_device *sdev = to_siw_dev(base_dev); struct siw_ucontext *uctx = @@ -316,6 +311,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, struct siw_cq *scq = NULL, *rcq = NULL; unsigned long flags; int num_sqe, num_rqe, rv = 0; + size_t length; siw_dbg(base_dev, "create new QP\n"); @@ -360,28 +356,16 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, rv = -EINVAL; goto err_out; } - siw_base_qp = kzalloc(sizeof(*siw_base_qp), GFP_KERNEL); - if (!siw_base_qp) { - rv = -ENOMEM; - goto err_out; - } qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) { rv = -ENOMEM; goto err_out; } - siw_base_qp->qp = qp; - qp->ib_qp = &siw_base_qp->base_qp; - init_rwsem(&qp->state_lock); spin_lock_init(&qp->sq_lock); spin_lock_init(&qp->rq_lock); spin_lock_init(&qp->orq_lock); - qp->kernel_verbs = !udata; - qp->xa_sq_index = SIW_INVAL_UOBJ_KEY; - qp->xa_rq_index = SIW_INVAL_UOBJ_KEY; - rv = siw_qp_add(sdev, qp); if (rv) goto err_out; @@ -394,10 +378,10 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); - if (qp->kernel_verbs) - qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); - else + if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); + else + qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); if (qp->sendq == NULL) { siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe); @@ -424,14 +408,14 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, */ qp->srq = to_siw_srq(attrs->srq); qp->attrs.rq_size = 0; - siw_dbg(base_dev, "QP [%u]: [SRQ 0x%p] attached\n", - qp->qp_num, qp->srq); + siw_dbg(base_dev, "QP [%u]: SRQ attached\n", + qp->base_qp.qp_num); } else if (num_rqe) { - if (qp->kernel_verbs) - qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); - else + if (udata) qp->recvq = vmalloc_user(num_rqe * sizeof(struct siw_rqe)); + else + qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); if (qp->recvq == NULL) { siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe); @@ -458,22 +442,27 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, uresp.qp_id = qp_id(qp); if (qp->sendq) { - qp->xa_sq_index = - siw_create_uobj(uctx, qp->sendq, - num_sqe * sizeof(struct siw_sqe)); + length = num_sqe * sizeof(struct siw_sqe); + qp->sq_entry = + siw_mmap_entry_insert(uctx, qp->sendq, + length, &uresp.sq_key); + if (!qp->sq_entry) { + rv = -ENOMEM; + goto err_out_xa; + } } + if (qp->recvq) { - qp->xa_rq_index = - siw_create_uobj(uctx, qp->recvq, - num_rqe * sizeof(struct siw_rqe)); - } - if (qp->xa_sq_index == SIW_INVAL_UOBJ_KEY || - qp->xa_rq_index == SIW_INVAL_UOBJ_KEY) { - rv = -ENOMEM; - goto err_out_xa; + length = num_rqe * sizeof(struct siw_rqe); + qp->rq_entry = + siw_mmap_entry_insert(uctx, qp->recvq, + length, &uresp.rq_key); + if (!qp->rq_entry) { + uresp.sq_key = SIW_INVAL_UOBJ_KEY; + rv = -ENOMEM; + goto err_out_xa; + } } - uresp.sq_key = qp->xa_sq_index << PAGE_SHIFT; - uresp.rq_key = qp->xa_rq_index << PAGE_SHIFT; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; @@ -493,19 +482,16 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, list_add_tail(&qp->devq, &sdev->qp_list); spin_unlock_irqrestore(&sdev->lock, flags); - return qp->ib_qp; + return &qp->base_qp; err_out_xa: xa_erase(&sdev->qp_xa, qp_id(qp)); err_out: - kfree(siw_base_qp); - if (qp) { - if (qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); - if (qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); - + if (uctx) { + rdma_user_mmap_entry_remove(qp->sq_entry); + rdma_user_mmap_entry_remove(qp->rq_entry); + } vfree(qp->sendq); vfree(qp->recvq); kfree(qp); @@ -604,13 +590,12 @@ out: int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) { struct siw_qp *qp = to_siw_qp(base_qp); - struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); struct siw_qp_attrs qp_attrs; - siw_dbg_qp(qp, "state %d, cep 0x%p\n", qp->attrs.state, qp->cep); + siw_dbg_qp(qp, "state %d\n", qp->attrs.state); /* * Mark QP as in process of destruction to prevent from @@ -619,10 +604,10 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->attrs.flags |= SIW_QP_IN_DESTROY; qp->rx_stream.rx_suspend = 1; - if (uctx && qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); - if (uctx && qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); + if (uctx) { + rdma_user_mmap_entry_remove(qp->sq_entry); + rdma_user_mmap_entry_remove(qp->rq_entry); + } down_write(&qp->state_lock); @@ -641,7 +626,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->scq = qp->rcq = NULL; siw_qp_put(qp); - kfree(siw_base_qp); return 0; } @@ -662,7 +646,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, void *kbuf = &sqe->sge[1]; int num_sge = core_wr->num_sge, bytes = 0; - sqe->sge[0].laddr = (u64)kbuf; + sqe->sge[0].laddr = (uintptr_t)kbuf; sqe->sge[0].lkey = 0; while (num_sge--) { @@ -687,6 +671,47 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, return bytes; } +/* Complete SQ WR's without processing */ +static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + struct siw_sqe sqe = {}; + int rv = 0; + + while (wr) { + sqe.id = wr->wr_id; + sqe.opcode = wr->opcode; + rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + +/* Complete RQ WR's without processing */ +static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct siw_rqe rqe = {}; + int rv = 0; + + while (wr) { + rqe.id = wr->wr_id; + rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + /* * siw_post_send() * @@ -705,26 +730,54 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, unsigned long flags; int rv = 0; + if (wr && !rdma_is_kernel_res(&qp->base_qp.res)) { + siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); + *bad_wr = wr; + return -EINVAL; + } + /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); - return -ENOTCONN; + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_sq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; } if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. SQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_sq(). + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); - return -ENOTCONN; - } - if (wr && !qp->kernel_verbs) { - siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); - up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } spin_lock_irqsave(&qp->sq_lock, flags); @@ -825,7 +878,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, break; case IB_WR_REG_MR: - sqe->base_mr = (uint64_t)reg_wr(wr)->mr; + sqe->base_mr = (uintptr_t)reg_wr(wr)->mr; sqe->rkey = reg_wr(wr)->key; sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK; sqe->opcode = SIW_OP_REG_MR; @@ -842,8 +895,9 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, rv = -EINVAL; break; } - siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n", - sqe->opcode, sqe->flags, (void *)sqe->id); + siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n", + sqe->opcode, sqe->flags, + (void *)(uintptr_t)sqe->id); if (unlikely(rv < 0)) break; @@ -873,7 +927,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, if (rv <= 0) goto skip_direct_sending; - if (qp->kernel_verbs) { + if (rdma_is_kernel_res(&qp->base_qp.res)) { rv = siw_sq_start(qp); } else { qp->tx_ctx.in_syscall = 1; @@ -918,24 +972,54 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, *bad_wr = wr; return -EOPNOTSUPP; /* what else from errno.h? */ } + if (!rdma_is_kernel_res(&qp->base_qp.res)) { + siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); + *bad_wr = wr; + return -EINVAL; + } + /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - return -ENOTCONN; - } - if (!qp->kernel_verbs) { - siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); - up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_rq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; } if (qp->attrs.state > SIW_QP_STATE_RTS) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. RQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_rq(). + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } /* * Serialize potentially multiple producers. @@ -992,8 +1076,8 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) siw_cq_flush(cq); - if (ctx && cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + if (ctx) + rdma_user_mmap_entry_remove(cq->cq_entry); atomic_dec(&sdev->num_cq); @@ -1030,16 +1114,14 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, size = roundup_pow_of_two(size); cq->base_cq.cqe = size; cq->num_cqe = size; - cq->xa_cq_index = SIW_INVAL_UOBJ_KEY; - if (!udata) { - cq->kernel_verbs = 1; - cq->queue = vzalloc(size * sizeof(struct siw_cqe) + - sizeof(struct siw_cq_ctrl)); - } else { + if (udata) cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) + sizeof(struct siw_cq_ctrl)); - } + else + cq->queue = vzalloc(size * sizeof(struct siw_cqe) + + sizeof(struct siw_cq_ctrl)); + if (cq->queue == NULL) { rv = -ENOMEM; goto err_out; @@ -1056,16 +1138,17 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); + size_t length = size * sizeof(struct siw_cqe) + + sizeof(struct siw_cq_ctrl); - cq->xa_cq_index = - siw_create_uobj(ctx, cq->queue, - size * sizeof(struct siw_cqe) + - sizeof(struct siw_cq_ctrl)); - if (cq->xa_cq_index == SIW_INVAL_UOBJ_KEY) { + cq->cq_entry = + siw_mmap_entry_insert(ctx, cq->queue, + length, &uresp.cq_key); + if (!cq->cq_entry) { rv = -ENOMEM; goto err_out; } - uresp.cq_key = cq->xa_cq_index << PAGE_SHIFT; + uresp.cq_id = cq->id; uresp.num_cqe = size; @@ -1086,8 +1169,8 @@ err_out: struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - if (cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + if (ctx) + rdma_user_mmap_entry_remove(cq->cq_entry); vfree(cq->queue); } atomic_dec(&sdev->num_cq); @@ -1205,8 +1288,8 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); int rv; - siw_dbg_pd(pd, "start: 0x%016llx, va: 0x%016llx, len: %llu\n", - (unsigned long long)start, (unsigned long long)rnic_va, + siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", + (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va, (unsigned long long)len); if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { @@ -1363,7 +1446,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, struct siw_mem *mem = mr->mem; struct siw_pbl *pbl = mem->pbl; struct siw_pble *pble; - u64 pbl_size; + unsigned long pbl_size; int i, rv; if (!pbl) { @@ -1402,16 +1485,18 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, pbl_size += sg_dma_len(slp); } siw_dbg_mem(mem, - "sge[%d], size %llu, addr 0x%016llx, total %llu\n", - i, pble->size, pble->addr, pbl_size); + "sge[%d], size %u, addr 0x%p, total %lu\n", + i, pble->size, (void *)(uintptr_t)pble->addr, + pbl_size); } rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); if (rv > 0) { mem->len = base_mr->length; mem->va = base_mr->iova; siw_dbg_mem(mem, - "%llu bytes, start 0x%016llx, %u SLE to %u entries\n", - mem->len, mem->va, num_sle, pbl->num_buf); + "%llu bytes, start 0x%pK, %u SLE to %u entries\n", + mem->len, (void *)(uintptr_t)mem->va, num_sle, + pbl->num_buf); } return rv; } @@ -1489,12 +1574,11 @@ int siw_create_srq(struct ib_srq *base_srq, } srq->max_sge = attrs->max_sge; srq->num_rqe = roundup_pow_of_two(attrs->max_wr); - srq->xa_srq_index = SIW_INVAL_UOBJ_KEY; srq->limit = attrs->srq_limit; if (srq->limit) - srq->armed = 1; + srq->armed = true; - srq->kernel_verbs = !udata; + srq->is_kernel_res = !udata; if (udata) srq->recvq = @@ -1508,15 +1592,16 @@ int siw_create_srq(struct ib_srq *base_srq, } if (udata) { struct siw_uresp_create_srq uresp = {}; + size_t length = srq->num_rqe * sizeof(struct siw_rqe); - srq->xa_srq_index = siw_create_uobj( - ctx, srq->recvq, srq->num_rqe * sizeof(struct siw_rqe)); - - if (srq->xa_srq_index == SIW_INVAL_UOBJ_KEY) { + srq->srq_entry = + siw_mmap_entry_insert(ctx, srq->recvq, + length, &uresp.srq_key); + if (!srq->srq_entry) { rv = -ENOMEM; goto err_out; } - uresp.srq_key = srq->xa_srq_index; + uresp.num_rqe = srq->num_rqe; if (udata->outlen < sizeof(uresp)) { @@ -1529,14 +1614,14 @@ int siw_create_srq(struct ib_srq *base_srq, } spin_lock_init(&srq->lock); - siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: success\n", srq); + siw_dbg_pd(base_srq->pd, "[SRQ]: success\n"); return 0; err_out: if (srq->recvq) { - if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); + if (ctx) + rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); } atomic_dec(&sdev->num_srq); @@ -1573,9 +1658,9 @@ int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs, rv = -EINVAL; goto out; } - srq->armed = 1; + srq->armed = true; } else { - srq->armed = 0; + srq->armed = false; } srq->limit = attrs->srq_limit; } @@ -1622,9 +1707,8 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); - + if (ctx) + rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); atomic_dec(&sdev->num_srq); } @@ -1648,10 +1732,9 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, unsigned long flags; int rv = 0; - if (unlikely(!srq->kernel_verbs)) { + if (unlikely(!srq->is_kernel_res)) { siw_dbg_pd(base_srq->pd, - "[SRQ 0x%p]: no kernel post_recv for mapped srq\n", - srq); + "[SRQ]: no kernel post_recv for mapped srq\n"); rv = -EINVAL; goto out; } @@ -1673,8 +1756,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, } if (unlikely(wr->num_sge > srq->max_sge)) { siw_dbg_pd(base_srq->pd, - "[SRQ 0x%p]: too many sge's: %d\n", srq, - wr->num_sge); + "[SRQ]: too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } @@ -1693,7 +1775,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&srq->lock, flags); out: if (unlikely(rv < 0)) { - siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: error %d\n", srq, rv); + siw_dbg_pd(base_srq->pd, "[SRQ]: error %d\n", rv); *bad_wr = wr; } return rv; @@ -1702,7 +1784,7 @@ out: void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype) { struct ib_event event; - struct ib_qp *base_qp = qp->ib_qp; + struct ib_qp *base_qp = &qp->base_qp; /* * Do not report asynchronous errors on QP which gets diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 1910869281cb..1a731989fad6 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -83,6 +83,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma); +void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry); void siw_qp_event(struct siw_qp *qp, enum ib_event_type type); void siw_cq_event(struct siw_cq *cq, enum ib_event_type type); void siw_srq_event(struct siw_srq *srq, enum ib_event_type type); |