From 907610bfdf1a7f3d173a9593fde70f67d63476d1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 3 Nov 2016 16:44:23 +0200 Subject: IB/rxe: Remove and fix debug prints after allocation failure The prints after [k|v][m|z|c]alloc() functions are not needed, because in case of failure, allocator will print their internal error prints anyway. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_pool.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 6bac0717c540..d723947a8542 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -180,7 +180,6 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) size = BITS_TO_LONGS(max - min + 1) * sizeof(long); pool->table = kmalloc(size, GFP_KERNEL); if (!pool->table) { - pr_warn("no memory for bit table\n"); err = -ENOMEM; goto out; } -- cgit v1.2.1 From a0fa72683e78979ef1123d679b1c40ae28bd9096 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Sep 2016 13:57:26 +0200 Subject: IB/rxe: avoid putting a large struct rxe_qp on stack A race condition fix added an rxe_qp structure to the stack in order to be able to perform rollback in rxe_requester(), but the structure is large enough to trigger the warning for possible stack overflow: drivers/infiniband/sw/rxe/rxe_req.c: In function 'rxe_requester': drivers/infiniband/sw/rxe/rxe_req.c:757:1: error: the frame size of 2064 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] This changes the rollback function to only save the psn inside the qp, which is the only field we access in the rollback_qp anyway. Fixes: 3050b9985024 ("IB/rxe: Fix race condition between requester and completer") Signed-off-by: Arnd Bergmann Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 832846b73ea0..205222909e53 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp, static void save_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 *rollback_psn) { rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; rollback_wqe->last_psn = wqe->last_psn; - rollback_qp->req.psn = qp->req.psn; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 rollback_psn) { wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_qp->req.psn; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -593,8 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - struct rxe_qp rollback_qp; struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -718,7 +718,7 @@ next_wqe: * rxe_xmit_packet(). * Otherwise, completer might initiate an unjustified retry flow. */ - save_state(wqe, qp, &rollback_wqe, &rollback_qp); + save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); @@ -726,7 +726,7 @@ next_wqe: qp->need_req_skb = 1; kfree_skb(skb); - rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); -- cgit v1.2.1 From 4ac4707102d9ea1ffc9b5735891f6c5ee3d236e5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 29 Oct 2016 16:19:33 +0000 Subject: IB/rxe: Use DEFINE_SPINLOCK() for spinlock spinlock can be initialized automatically with DEFINE_SPINLOCK() rather than explicitly calling spin_lock_init(). Signed-off-by: Wei Yongjun Reviewed-by: Leon Romanosky Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index b8258e4f0aea..4cb63780fa74 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -46,7 +46,7 @@ #include "rxe_loc.h" static LIST_HEAD(rxe_dev_list); -static spinlock_t dev_list_lock; /* spinlock for device list */ +static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */ struct rxe_dev *net_to_rxe(struct net_device *ndev) { @@ -663,8 +663,6 @@ struct notifier_block rxe_net_notifier = { int rxe_net_ipv4_init(void) { - spin_lock_init(&dev_list_lock); - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { @@ -680,8 +678,6 @@ int rxe_net_ipv6_init(void) { #if IS_ENABLED(CONFIG_IPV6) - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), true); if (IS_ERR(recv_sockets.sk6)) { -- cgit v1.2.1 From 95db9d05b717b1ec242799103c09acda8d728974 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Nov 2016 14:00:05 +0300 Subject: IB/rxe: Remove unneeded cast in rxe_srq_from_attr() It makes me nervous when we cast pointer parameters. I would estimate that around 50% of the time, it indicates a bug. Here the cast is not needed becaue u32 and and unsigned int are the same thing. Removing the cast makes the code more robust and future proof in case any of the types change. Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Reviewed-by: Yuval Shaia Acked-by: Moni Shoua Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 2a6e3cd2d4e8..efc832a2d7c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -169,7 +169,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, } } - err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr, + err = rxe_queue_resize(q, &attr->max_wr, rcv_wqe_size(srq->rq.max_sge), srq->rq.queue->ip ? srq->rq.queue->ip->context : -- cgit v1.2.1 From 6e9bb530ff8b07b3bbce79c937fc9dcb32da4eb9 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:17 -0500 Subject: IB/rxe: Remove buffer used for printing IP address Avoid smashing the stack when an ICRC error occurs on an IPv6 network. Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_recv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 46f062842a9a..252b4d637d45 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -391,16 +391,15 @@ int rxe_rcv(struct sk_buff *skb) payload_size(pkt)); calc_icrc = cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { - char saddr[sizeof(struct in6_addr)]; - if (skb->protocol == htons(ETH_P_IPV6)) - sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI6c\n", + &ipv6_hdr(skb)->saddr); else if (skb->protocol == htons(ETH_P_IP)) - sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI4\n", + &ip_hdr(skb)->saddr); else - sprintf(saddr, "unknown"); + pr_warn_ratelimited("bad ICRC from unknown\n"); - pr_warn_ratelimited("bad ICRC from %s\n", saddr); goto drop; } -- cgit v1.2.1 From dd753d87436ce7ef2a958d684b38d5acc99c9f5c Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:18 -0500 Subject: IB/rxe: Advance the consumer pointer before posting the CQE A simple userspace application might poll the CQ, find a completion, and then attempt to post a new WQE to the SQ. A spurious error can occur if the userspace application detects a full SQ in the instant before the kernel is able to advance the SQ consumer pointer. This is noticeable when using single-entry SQs with ibv_rc_pingpong if lots of kernel and userspace library debugging is enabled. Signed-off-by: Andrew Boyer Reviewed-by: Yonatan Cohen Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 6c5e29db88e3..d46c49b33b13 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -420,11 +420,12 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) (wqe->wr.send_flags & IB_SEND_SIGNALED) || (qp->req.state == QP_STATE_ERROR)) { make_send_cqe(qp, wqe, &cqe); + advance_consumer(qp->sq.queue); rxe_cq_post(qp->scq, &cqe, 0); + } else { + advance_consumer(qp->sq.queue); } - advance_consumer(qp->sq.queue); - /* * we completed something so let req run again * if it is trying to fence -- cgit v1.2.1 From 2a7a85487e5432424eef7a394ed26ef1d8f0d192 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:19 -0500 Subject: IB/rxe: Don't update the response PSN unless it's going forwards A client might post a read followed by a send. The partner receives and acknowledges both transactions, posting an RCQ entry for the send, but something goes wrong with the read ACK. When the client retries the read, the partner's responder processes the duplicate read but incorrectly resets the PSN to the value preceding the original send. When the duplicate send arrives, the responder cannot tell that it is a duplicate, so the responder generates a duplicate RCQ entry, confusing the client. Signed-off-by: Andrew Boyer Reviewed-by: Yonatan Cohen Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index dd3d88adc003..cb3fd4cb0daa 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -742,7 +742,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, } else { qp->resp.res = NULL; qp->resp.opcode = -1; - qp->resp.psn = res->cur_psn; + if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) + qp->resp.psn = res->cur_psn; state = RESPST_CLEANUP; } -- cgit v1.2.1 From d38eb801aa145aedf4b97e8e0bb2e65763aa6149 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:20 -0500 Subject: IB/rxe: Unblock loopback by moving skb_out increment skb_out is decremented in rxe_skb_tx_dtor(), which is not called in the loopback() path. Move the increment to the send() path rather than rxe_xmit_packet(). Signed-off-by: Andrew Boyer Acked-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_loc.h | 2 -- drivers/infiniband/sw/rxe/rxe_net.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 73849a5a91b3..efe4c6a35442 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -266,8 +266,6 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - atomic_inc(&qp->skb_out); - if ((qp_type(qp) != IB_QPT_RC) && (pkt->mask & RXE_END_MASK)) { pkt->wqe->state = wqe_state_done; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 4cb63780fa74..a576603304f7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -459,6 +459,8 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return -EAGAIN; } + if (pkt->qp) + atomic_inc(&pkt->qp->skb_out); kfree_skb(skb); return 0; -- cgit v1.2.1 From d4fb59256ac03d84f68e36c430b58d6fc76dd651 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:21 -0500 Subject: IB/rxe: Add support for zero-byte operations The last_psn algorithm fails in the zero-byte case: it calculates first_psn = N, last_psn = N-1. This makes the operation unretryable since the res structure will fail the (first_psn <= psn <= last_psn) test in find_resource(). While here, use BTH_PSN_MASK to mask the calculated last_psn. Signed-off-by: Andrew Boyer Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_mr.c | 3 +++ drivers/infiniband/sw/rxe/rxe_resp.c | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 1869152f1d23..d0faca294006 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -355,6 +355,9 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, size_t offset; u32 crc = crcp ? (*crcp) : 0; + if (length == 0) + return 0; + if (mem->type == RXE_MEM_TYPE_DMA) { u8 *src, *dest; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index cb3fd4cb0daa..a5e9ce34171b 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -444,6 +444,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp, return RESPST_EXECUTE; } + /* A zero-byte op is not required to set an addr or rkey. */ + if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && + (pkt->mask & RXE_RETH_MASK) && + reth_len(pkt) == 0) { + return RESPST_EXECUTE; + } + va = qp->resp.va; rkey = qp->resp.rkey; resid = qp->resp.resid; @@ -680,9 +687,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, res->read.va_org = qp->resp.va; res->first_psn = req_pkt->psn; - res->last_psn = req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1; + + if (reth_len(req_pkt)) { + res->last_psn = (req_pkt->psn + + (reth_len(req_pkt) + mtu - 1) / + mtu - 1) & BTH_PSN_MASK; + } else { + res->last_psn = res->first_psn; + } res->cur_psn = req_pkt->psn; res->read.resid = qp->resp.resid; -- cgit v1.2.1 From accacb8f51c299965939ac56926d1c718e2691a1 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:22 -0500 Subject: IB/rxe: Add support for IB_CQ_REPORT_MISSED_EVENTS Peek at the CQ after arming it so that we can return a hint. This avoids missed completions due to a race between posting CQEs and arming the CQ. For example, CM teardown waits on MAD requests to complete with ib_cq_poll_work(). Without this fix, the last completion might be left on the CQ, hanging the kthread doing the teardown. The console backtraces look like this: [ 4199.911284] Call Trace: [ 4199.911401] [] schedule+0x35/0x80 [ 4199.911556] [] schedule_timeout+0x22f/0x2c0 [ 4199.911727] [] ? __schedule+0x368/0xa20 [ 4199.911891] [] wait_for_completion+0xb3/0x130 [ 4199.912067] [] ? wake_up_q+0x70/0x70 [ 4199.912243] [] cm_destroy_id+0x13d/0x450 [ib_cm] [ 4199.912422] [] ? printk+0x57/0x73 [ 4199.912578] [] ib_destroy_cm_id+0x10/0x20 [ib_cm] [ 4199.912759] [] rdma_destroy_id+0xac/0x340 [rdma_cm] [ 4199.912941] [] 0xffffffffc076f2cc Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..de39b0a081de 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1007,11 +1007,19 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct rxe_cq *cq = to_rcq(ibcq); + unsigned long irq_flags; + int ret = 0; + spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; - return 0; + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) + ret = 1; + + spin_unlock_irqrestore(&cq->cq_lock, irq_flags); + + return ret; } static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) -- cgit v1.2.1 From 5b9ea16c5488e577b26cc198ac63550b746ce3b9 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:23 -0500 Subject: IB/rxe: Fix ref leak in rxe_create_qp() The udata->inlen error path needs to clean up the ref added by rxe_alloc(). Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index de39b0a081de..071430c594b7 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -564,7 +564,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, if (udata) { if (udata->inlen) { err = -EINVAL; - goto err1; + goto err2; } qp->is_user = 1; } @@ -573,12 +573,13 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); if (err) - goto err2; + goto err3; return &qp->ibqp; -err2: +err3: rxe_drop_index(qp); +err2: rxe_drop_ref(qp); err1: return ERR_PTR(err); -- cgit v1.2.1 From 5407f530122aa63cf304eb0874c938b3bdb8d3fb Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:24 -0500 Subject: IB/rxe: Fix ref leak in duplicate_request() A ref was added after the call to skb_clone(). Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index a5e9ce34171b..8643797fb530 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1145,6 +1145,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, pkt, skb_copy); if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); + rxe_drop_ref(qp); kfree_skb(skb_copy); rc = RESPST_CLEANUP; goto out; -- cgit v1.2.1 From 07bf9627d5f1c0334fc543a5435a31a3b5907944 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 5 Dec 2016 08:43:20 -0500 Subject: IB/rxe: Wait for tasklets to finish before tearing down QP The system may crash when a malformed request is received and the error is detected by the responder. NodeA: $ ibv_rc_pingpong -g 0 -d rxe0 -i 1 -n 1 -s 50000 NodeB: $ ibv_rc_pingpong -g 0 -d rxe0 -i 1 -n 1 -s 1024 The responder generates a receive error on node B since the incoming SEND is oversized. If the client tears down the QP before the responder or the completer finish running, a page fault may occur. The fix makes the destroy operation spin until the tasks complete, which appears to be original intent of the design. Signed-off-by: Andrew Boyer Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_task.c | 19 +++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_task.h | 1 + 2 files changed, 20 insertions(+) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 1e19bf828a6e..d2a14a1bdc7f 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -121,6 +121,7 @@ int rxe_init_task(void *obj, struct rxe_task *task, task->arg = arg; task->func = func; snprintf(task->name, sizeof(task->name), "%s", name); + task->destroyed = false; tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); @@ -132,11 +133,29 @@ int rxe_init_task(void *obj, struct rxe_task *task, void rxe_cleanup_task(struct rxe_task *task) { + unsigned long flags; + bool idle; + + /* + * Mark the task, then wait for it to finish. It might be + * running in a non-tasklet (direct call) context. + */ + task->destroyed = true; + + do { + spin_lock_irqsave(&task->state_lock, flags); + idle = (task->state == TASK_STATE_START); + spin_unlock_irqrestore(&task->state_lock, flags); + } while (!idle); + tasklet_kill(&task->tasklet); } void rxe_run_task(struct rxe_task *task, int sched) { + if (task->destroyed) + return; + if (sched) tasklet_schedule(&task->tasklet); else diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index d14aa6daed05..08ff42d451c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -54,6 +54,7 @@ struct rxe_task { int (*func)(void *arg); int ret; char name[16]; + bool destroyed; }; /* -- cgit v1.2.1 From 37f69f43fb5aba4288d38ea32bbe0dfdb412c763 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 5 Dec 2016 08:43:21 -0500 Subject: IB/rxe: Hold refs when running tasklets It might be possible for all of a QP's references to be dropped while one of that QP's tasklets is running. For example, the completer might run during QP destroy. If qp->valid is false, it will drop all of the packets on the resp_pkts list, potentially removing the last reference. Then it tries to advance the SQ consumer pointer. If the SQ's buffer has already been destroyed, the system will panic. To be safe, hold a reference on the QP for the duration of each tasklet. Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++++ drivers/infiniband/sw/rxe/rxe_req.c | 5 ++++- drivers/infiniband/sw/rxe/rxe_resp.c | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index d46c49b33b13..cd27cbde7652 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -511,6 +511,8 @@ int rxe_completer(void *arg) struct rxe_pkt_info *pkt = NULL; enum comp_state state; + rxe_add_ref(qp); + if (!qp->valid) { while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_drop_ref(qp); @@ -740,11 +742,13 @@ exit: /* we come here if we are done with processing and want the task to * exit from the loop calling us */ + rxe_drop_ref(qp); return -EAGAIN; done: /* we come here if we have processed a packet we want the task to call * us again to see if there is anything else to do */ + rxe_drop_ref(qp); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 205222909e53..b246653cf713 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -596,6 +596,8 @@ int rxe_requester(void *arg) struct rxe_send_wqe rollback_wqe; u32 rollback_psn; + rxe_add_ref(qp); + next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) goto exit; @@ -750,9 +752,10 @@ complete: while (rxe_completer(qp) == 0) ; } - + rxe_drop_ref(qp); return 0; exit: + rxe_drop_ref(qp); return -EAGAIN; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 8643797fb530..7a36ec9dbc0c 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1212,6 +1212,8 @@ int rxe_responder(void *arg) struct rxe_pkt_info *pkt = NULL; int ret = 0; + rxe_add_ref(qp); + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; if (!qp->valid) { @@ -1400,5 +1402,6 @@ int rxe_responder(void *arg) exit: ret = -EAGAIN; done: + rxe_drop_ref(qp); return ret; } -- cgit v1.2.1 From d680ebed91e0b45c43ae03a880a0b43211096161 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:16 +0200 Subject: IB/rxe: Increase max number of completions to 32k Increase limit of max CQE from 8K to 32K to allow demanding applications to work over SoftRoCE with same configuration as most RoCEv2 HW vendors have. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f459c43a77c8..13ed2cc6eaa2 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -82,7 +82,7 @@ enum rxe_device_param { RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, - RXE_MAX_LOG_CQE = 13, + RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 2 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, -- cgit v1.2.1 From 477864c8fcd953e5a988073ca5be18bb7fd93410 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Nov 2016 08:23:24 +0200 Subject: IB/core: Let create_ah return extended response to user Add struct ib_udata to the signature of create_ah callback that is implemented by IB device drivers. This allows HW drivers to return extra data to the userspace library. This patch prepares the ground for mlx5 driver to resolve destination mac address for a given GID and return it to userspace. This patch was previously submitted by Knut Omang as a part of the patch set to support Oracle's Infiniband HCA (SIF). Signed-off-by: Knut Omang Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rxe') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..187d85ccfe58 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -316,7 +316,9 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, return err; } -static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) + { int err; struct rxe_dev *rxe = to_rdev(ibpd->device); -- cgit v1.2.1