diff options
author | Dmitry Torokhov <dtor@insightbb.com> | 2007-05-01 00:24:54 -0400 |
---|---|---|
committer | Dmitry Torokhov <dtor@insightbb.com> | 2007-05-01 00:24:54 -0400 |
commit | bc95f3669f5e6f63cf0b84fe4922c3c6dd4aa775 (patch) | |
tree | 427fcf2a7287c16d4b5aa6cbf494d59579a6a8b1 /drivers/infiniband/hw | |
parent | 3d29cdff999c37b3876082278a8134a0642a02cd (diff) | |
parent | dc87c3985e9b442c60994308a96f887579addc39 (diff) | |
download | blackbird-op-linux-bc95f3669f5e6f63cf0b84fe4922c3c6dd4aa775.tar.gz blackbird-op-linux-bc95f3669f5e6f63cf0b84fe4922c3c6dd4aa775.zip |
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
drivers/usb/input/Makefile
drivers/usb/input/gtco.c
Diffstat (limited to 'drivers/infiniband/hw')
61 files changed, 1879 insertions, 1170 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 59243d9aedd6..58bc272bd407 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) } /* Setup the skb for reuse since we're dropping this pkt */ - elem->skb->tail = elem->skb->data = elem->skb->head; + elem->skb->data = elem->skb->head; + skb_reset_tail_pointer(elem->skb); /* Zero out the rxp hdr in the sk_buff */ memset(elem->skb->data, 0, sizeof(*rxp_hdr)); @@ -521,9 +522,8 @@ static void c2_rx_interrupt(struct net_device *netdev) * "sizeof(struct c2_rxp_hdr)". */ skb->data += sizeof(*rxp_hdr); - skb->tail = skb->data + buflen; + skb_set_tail_pointer(skb, buflen); skb->len = buflen; - skb->dev = netdev; skb->protocol = eth_type_trans(skb, netdev); netif_rx(skb); diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index fef972752912..607c09bf764c 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -796,7 +796,6 @@ int c2_register_device(struct c2_dev *dev) memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); dev->ibdev.phys_port_cnt = 1; dev->ibdev.dma_device = &dev->pcidev->dev; - dev->ibdev.class_dev.dev = &dev->pcidev->dev; dev->ibdev.query_device = c2_query_device; dev->ibdev.query_port = c2_query_port; dev->ibdev.modify_port = c2_modify_port; diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 0e110f32f128..36b98989b15e 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -8,5 +8,4 @@ iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ ifdef CONFIG_INFINIBAND_CXGB3_DEBUG EXTRA_CFLAGS += -DDEBUG -iw_cxgb3-y += cxio_dbg.o endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c index 5a7306f5efae..75f7b16a271d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c +++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 82fa72041989..f5e9aeec6f6e 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,6 +36,7 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/pci.h> +#include <linux/dma-mapping.h> #include "cxio_resource.h" #include "cxio_hal.h" @@ -46,7 +46,7 @@ static LIST_HEAD(rdev_list); static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; -static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) +static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) { struct cxio_rdev *rdev; @@ -56,8 +56,7 @@ static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) return NULL; } -static inline struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev - *tdev) +static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) { struct cxio_rdev *rdev; @@ -119,7 +118,7 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, return 0; } -static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) +static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) { struct rdma_cq_setup setup; setup.id = cqid; @@ -131,7 +130,7 @@ static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); } -int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) +static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) { u64 sge_cmd; struct t3_modify_qp_wr *wqe; @@ -426,7 +425,7 @@ void cxio_flush_hw_cq(struct t3_cq *cq) } } -static inline int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) +static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) { if (CQE_OPCODE(*cqe) == T3_TERMINATE) return 0; @@ -499,9 +498,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) u64 sge_cmd, ctx0, ctx1; u64 base_addr; struct t3_modify_qp_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - + struct sk_buff *skb; + skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); if (!skb) { PDBG("%s alloc_skb failed\n", __FUNCTION__); return -ENOMEM; @@ -509,7 +508,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) err = cxio_hal_init_ctrl_cq(rdev_p); if (err) { PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err); - return err; + goto err; } rdev_p->ctrl_qp.workq = dma_alloc_coherent( &(rdev_p->rnic_info.pdev->dev), @@ -519,7 +518,8 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) GFP_KERNEL); if (!rdev_p->ctrl_qp.workq) { PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__); - return -ENOMEM; + err = -ENOMEM; + goto err; } pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping, rdev_p->ctrl_qp.dma_addr); @@ -557,6 +557,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); skb->priority = CPL_PRIORITY_CONTROL; return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); +err: + kfree_skb(skb); + return err; } static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) @@ -761,17 +764,6 @@ ret: return err; } -/* IN : stag key, pdid, pbl_size - * Out: stag index, actaul pbl_size, and pbl_addr allocated. - */ -int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr) -{ - *stag = T3_STAG_UNSET; - return (__cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, - perm, 0, 0ULL, 0, 0, NULL, pbl_size, pbl_addr)); -} - int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, u32 *pbl_size, @@ -1030,7 +1022,7 @@ void __exit cxio_hal_exit(void) cxio_hal_destroy_rhdl_resource(); } -static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) +static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) { struct t3_swsq *sqp; __u32 ptr = wq->sq_rptr; @@ -1059,9 +1051,8 @@ static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) break; } -static inline void create_read_req_cqe(struct t3_wq *wq, - struct t3_cqe *hw_cqe, - struct t3_cqe *read_cqe) +static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, + struct t3_cqe *read_cqe) { read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; read_cqe->len = wq->oldest_read->read_len; @@ -1074,7 +1065,7 @@ static inline void create_read_req_cqe(struct t3_wq *wq, /* * Return a ptr to the next read wr in the SWSQ or NULL. */ -static inline void advance_oldest_read(struct t3_wq *wq) +static void advance_oldest_read(struct t3_wq *wq) { u32 rptr = wq->oldest_read - wq->sq + 1; diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 1b97e80b8780..99543d634704 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -144,7 +143,6 @@ int cxio_rdev_open(struct cxio_rdev *rdev); void cxio_rdev_close(struct cxio_rdev *rdev); int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, enum t3_cq_opcode op, u32 credit); -int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev, u32 qpid); int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); @@ -155,8 +153,6 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, struct cxio_ucontext *uctx); int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); -int cxio_allocate_stag(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr); int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, u32 *pbl_size, @@ -172,8 +168,6 @@ int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -u32 cxio_hal_get_rhdl(void); -void cxio_hal_put_rhdl(u32 rhdl); u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); int __init cxio_hal_init(void); diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c index 997aa32cbf07..d3095ae5bc2e 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -180,7 +179,7 @@ tpt_err: /* * returns 0 if no resource available */ -static inline u32 cxio_hal_get_resource(struct kfifo *fifo) +static u32 cxio_hal_get_resource(struct kfifo *fifo) { u32 entry; if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32))) @@ -189,21 +188,11 @@ static inline u32 cxio_hal_get_resource(struct kfifo *fifo) return 0; /* fifo emptry */ } -static inline void cxio_hal_put_resource(struct kfifo *fifo, u32 entry) +static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry) { BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0); } -u32 cxio_hal_get_rhdl(void) -{ - return cxio_hal_get_resource(rhdl_fifo); -} - -void cxio_hal_put_rhdl(u32 rhdl) -{ - cxio_hal_put_resource(rhdl_fifo, rhdl); -} - u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) { return cxio_hal_get_resource(rscp->tpt_fifo); diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h index a6bbe8370d81..a2703a3d882d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.h +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 103fc42d6976..90d7b8972cb4 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 4611afa52220..0315c9d9fce9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 6517ef85026f..caf4e6007a44 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index a522b1baa3b4..3b4b0acd707f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -210,8 +209,7 @@ static enum iwch_ep_state state_read(struct iwch_ep_common *epc) return state; } -static inline void __state_set(struct iwch_ep_common *epc, - enum iwch_ep_state new) +static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) { epc->state = new; } @@ -307,8 +305,7 @@ static int status2errno(int status) */ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) { - if (skb) { - BUG_ON(skb_cloned(skb)); + if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { skb_trim(skb, 0); skb_get(skb); } else { @@ -480,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) BUG_ON(skb_cloned(skb)); mpalen = sizeof(*mpa) + ep->plen; - if (skb->data + mpalen + sizeof(*req) > skb->end) { + if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { kfree_skb(skb); skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); if (!skb) { @@ -510,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) */ skb_get(skb); set_arp_failure_handler(skb, arp_failure_discard); - skb->h.raw = skb->data; + skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); @@ -562,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) skb_get(skb); skb->priority = CPL_PRIORITY_DATA; set_arp_failure_handler(skb, arp_failure_discard); - skb->h.raw = skb->data; + skb_reset_transport_header(skb); req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); @@ -613,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) */ skb_get(skb); set_arp_failure_handler(skb, arp_failure_discard); - skb->h.raw = skb->data; + skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); @@ -824,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) /* * copy the new data into our accumulation buffer. */ - memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); ep->mpa_pkt_len += skb->len; /* @@ -943,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) /* * Copy the new data into our accumulation buffer. */ - memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); ep->mpa_pkt_len += skb->len; /* @@ -1417,6 +1416,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) wake_up(&ep->com.waitq); break; case FPDU_MODE: + start_ep_timer(ep); __state_set(&ep->com, CLOSING); attrs.next_state = IWCH_QP_STATE_CLOSING; iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, @@ -1427,7 +1427,6 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) disconnect = 0; break; case CLOSING: - start_ep_timer(ep); __state_set(&ep->com, MORIBUND); disconnect = 0; break; @@ -1460,7 +1459,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) /* * Returns whether an ABORT_REQ_RSS message is a negative advice. */ -static inline int is_neg_adv_abort(unsigned int status) +static int is_neg_adv_abort(unsigned int status) { return status == CPL_ERR_RTX_NEG_ADVICE || status == CPL_ERR_PERSIST_NEG_ADVICE; @@ -1489,8 +1488,10 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) case CONNECTING: break; case MPA_REQ_WAIT: + stop_ep_timer(ep); break; case MPA_REQ_SENT: + stop_ep_timer(ep); connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REP_SENT: @@ -1509,9 +1510,10 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) get_ep(&ep->com); break; case MORIBUND: + case CLOSING: stop_ep_timer(ep); + /*FALLTHROUGH*/ case FPDU_MODE: - case CLOSING: if (ep->com.cm_id && ep->com.qp) { attrs.next_state = IWCH_QP_STATE_ERROR; ret = iwch_modify_qp(ep->com.qp->rhp, @@ -1572,7 +1574,6 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) spin_lock_irqsave(&ep->com.lock, flags); switch (ep->com.state) { case CLOSING: - start_ep_timer(ep); __state_set(&ep->com, MORIBUND); break; case MORIBUND: @@ -1588,6 +1589,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) __state_set(&ep->com, DEAD); release = 1; break; + case ABORTING: + break; case DEAD: default: BUG_ON(1); @@ -1618,7 +1621,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p\n", __FUNCTION__, ep); skb_pull(skb, sizeof(struct cpl_rdma_terminate)); PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len); - memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len); + skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, + skb->len); ep->com.qp->attr.terminate_msg_len = skb->len; ep->com.qp->attr.is_terminate_local = 0; return CPL_RET_BUF_DONE; @@ -1636,6 +1640,7 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n", __FUNCTION__, ep->hwtid); + stop_ep_timer(ep); attrs.next_state = IWCH_QP_STATE_ERROR; iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, @@ -1660,6 +1665,7 @@ static void ep_timeout(unsigned long arg) break; case MPA_REQ_WAIT: break; + case CLOSING: case MORIBUND: if (ep->com.cm_id && ep->com.qp) { attrs.next_state = IWCH_QP_STATE_ERROR; @@ -1688,12 +1694,11 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) return -ECONNRESET; } BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - state_set(&ep->com, CLOSING); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); else { err = send_mpa_reject(ep, pdata, pdata_len); - err = send_halfclose(ep, GFP_KERNEL); + err = iwch_ep_disconnect(ep, 0, GFP_KERNEL); } return 0; } @@ -1958,11 +1963,11 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: + start_ep_timer(ep); ep->com.state = CLOSING; close = 1; break; case CLOSING: - start_ep_timer(ep); ep->com.state = MORIBUND; close = 1; break; @@ -2024,6 +2029,17 @@ static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return 0; } +static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb); + + if (rpl->status != CPL_ERR_NONE) { + printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u " + "for tid %u\n", rpl->status, GET_TID(rpl)); + } + return CPL_RET_BUF_DONE; +} + int __init iwch_cm_init(void) { skb_queue_head_init(&rxq); @@ -2051,6 +2067,7 @@ int __init iwch_cm_init(void) t3c_handlers[CPL_ABORT_REQ_RSS] = sched; t3c_handlers[CPL_RDMA_TERMINATE] = sched; t3c_handlers[CPL_RDMA_EC_STATUS] = sched; + t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl; /* * These are the real handlers that are called from a diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 7c810d904279..0c6f281bd4a0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 98b3bdb5de9e..d7624c170ee7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index a6efa8fe15d8..b40676662a8a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -48,12 +47,6 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, struct iwch_qp_attributes attrs; struct iwch_qp *qhp; - printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x " - "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - spin_lock(&rnicp->lock); qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); @@ -74,6 +67,12 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, return; } + printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x " + "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__, + CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), + CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), + CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); + atomic_inc(&qhp->refcnt); spin_unlock(&rnicp->lock); diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 2b6cd53bb3fc..a6c2c4ba29e6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 6861087d776c..af28a317016d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -332,6 +331,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) int ret = 0; struct iwch_mm_entry *mm; struct iwch_ucontext *ucontext; + u64 addr; PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff, key, len); @@ -346,10 +346,11 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) mm = remove_mmap(ucontext, key, len); if (!mm) return -EINVAL; + addr = mm->addr; kfree(mm); - if ((mm->addr >= rdev_p->rnic_info.udbell_physbase) && - (mm->addr < (rdev_p->rnic_info.udbell_physbase + + if ((addr >= rdev_p->rnic_info.udbell_physbase) && + (addr < (rdev_p->rnic_info.udbell_physbase + rdev_p->rnic_info.udbell_len))) { /* @@ -363,7 +364,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; vma->vm_flags &= ~VM_MAYREAD; ret = io_remap_pfn_range(vma, vma->vm_start, - mm->addr >> PAGE_SHIFT, + addr >> PAGE_SHIFT, len, vma->vm_page_prot); } else { @@ -371,7 +372,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) * Map WQ or CQ contig dma memory... */ ret = remap_pfn_range(vma, vma->vm_start, - mm->addr >> PAGE_SHIFT, + addr >> PAGE_SHIFT, len, vma->vm_page_prot); } @@ -464,9 +465,6 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, php = to_iwch_pd(pd); rhp = php->rhp; - acc = iwch_convert_access(acc); - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); @@ -492,12 +490,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; - /* NOTE: TPT perms are backwards from BIND WR perms! */ - mhp->attr.perms = (acc & 0x1) << 3; - mhp->attr.perms |= (acc & 0x2) << 1; - mhp->attr.perms |= (acc & 0x4) >> 1; - mhp->attr.perms |= (acc & 0x8) >> 3; - + mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.va_fbo = *iova_start; mhp->attr.page_size = shift - 12; @@ -526,7 +519,6 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, struct iwch_mr mh, *mhp; struct iwch_pd *php; struct iwch_dev *rhp; - int new_acc; __be64 *page_list = NULL; int shift = 0; u64 total_size; @@ -547,19 +539,20 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, if (rhp != php->rhp) return -EINVAL; - new_acc = mhp->attr.perms; - memcpy(&mh, mhp, sizeof *mhp); if (mr_rereg_mask & IB_MR_REREG_PD) php = to_iwch_pd(pd); if (mr_rereg_mask & IB_MR_REREG_ACCESS) - mh.attr.perms = iwch_convert_access(acc); - if (mr_rereg_mask & IB_MR_REREG_TRANS) + mh.attr.perms = iwch_ib_to_tpt_access(acc); + if (mr_rereg_mask & IB_MR_REREG_TRANS) { ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, &total_size, &npages, &shift, &page_list); + if (ret) + return ret; + } ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); kfree(page_list); @@ -569,7 +562,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, if (mr_rereg_mask & IB_MR_REREG_PD) mhp->attr.pdid = php->pdid; if (mr_rereg_mask & IB_MR_REREG_ACCESS) - mhp->attr.perms = acc; + mhp->attr.perms = iwch_ib_to_tpt_access(acc); if (mr_rereg_mask & IB_MR_REREG_TRANS) { mhp->attr.zbva = 0; mhp->attr.va_fbo = *iova_start; @@ -614,8 +607,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, goto err; } - acc = iwch_convert_access(acc); - i = n = 0; list_for_each_entry(chunk, ®ion->chunk_list, list) @@ -631,10 +622,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; - mhp->attr.perms = (acc & 0x1) << 3; - mhp->attr.perms |= (acc & 0x2) << 1; - mhp->attr.perms |= (acc & 0x4) >> 1; - mhp->attr.perms |= (acc & 0x8) >> 3; + mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.va_fbo = region->virt_base; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) region->length; @@ -737,10 +725,8 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp) qhp = to_iwch_qp(ib_qp); rhp = qhp->rhp; - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); - } + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid); @@ -949,7 +935,7 @@ void iwch_qp_rem_ref(struct ib_qp *qp) wake_up(&(to_iwch_qp(qp)->wait)); } -struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) +static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) { PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn); return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); @@ -1122,7 +1108,6 @@ int iwch_register_device(struct iwch_dev *dev) memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); - dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev); dev->ibdev.query_device = iwch_query_device; dev->ibdev.query_port = iwch_query_port; dev->ibdev.modify_port = iwch_modify_port; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 61e3278fd7a8..93bcc56756bd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -179,7 +178,6 @@ static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp) void iwch_qp_add_ref(struct ib_qp *qp); void iwch_qp_rem_ref(struct ib_qp *qp); -struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn); struct iwch_ucontext { struct ib_ucontext ibucontext; @@ -288,27 +286,20 @@ static inline int iwch_convert_state(enum ib_qp_state ib_state) } } -enum iwch_mem_perms { - IWCH_MEM_ACCESS_LOCAL_READ = 1 << 0, - IWCH_MEM_ACCESS_LOCAL_WRITE = 1 << 1, - IWCH_MEM_ACCESS_REMOTE_READ = 1 << 2, - IWCH_MEM_ACCESS_REMOTE_WRITE = 1 << 3, - IWCH_MEM_ACCESS_ATOMICS = 1 << 4, - IWCH_MEM_ACCESS_BINDING = 1 << 5, - IWCH_MEM_ACCESS_LOCAL = - (IWCH_MEM_ACCESS_LOCAL_READ | IWCH_MEM_ACCESS_LOCAL_WRITE), - IWCH_MEM_ACCESS_REMOTE = - (IWCH_MEM_ACCESS_REMOTE_WRITE | IWCH_MEM_ACCESS_REMOTE_READ) - /* cannot go beyond 1 << 31 */ -} __attribute__ ((packed)); - -static inline u32 iwch_convert_access(int acc) +static inline u32 iwch_ib_to_tpt_access(int acc) { - return (acc & IB_ACCESS_REMOTE_WRITE ? IWCH_MEM_ACCESS_REMOTE_WRITE : 0) - | (acc & IB_ACCESS_REMOTE_READ ? IWCH_MEM_ACCESS_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? IWCH_MEM_ACCESS_LOCAL_WRITE : 0) | - (acc & IB_ACCESS_MW_BIND ? IWCH_MEM_ACCESS_BINDING : 0) | - IWCH_MEM_ACCESS_LOCAL_READ; + return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | + TPT_LOCAL_READ; +} + +static inline u32 iwch_ib_to_mwbind_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) | + T3_MEM_ACCESS_LOCAL_READ; } enum iwch_mmid_state { diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index e066727504b6..0a472c9b44db 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,8 +36,8 @@ #define NO_SUPPORT -1 -static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, - u8 * flit_cnt) +static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, + u8 * flit_cnt) { int i; u32 plen; @@ -97,8 +96,8 @@ static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) { int i; u32 plen; @@ -138,8 +137,8 @@ static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) { if (wr->num_sge > 1) return -EINVAL; @@ -159,9 +158,8 @@ static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, /* * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. */ -static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp, - struct ib_sge *sg_list, u32 num_sgle, - u32 * pbl_addr, u8 * page_size) +static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, + u32 num_sgle, u32 * pbl_addr, u8 * page_size) { int i; struct iwch_mr *mhp; @@ -207,9 +205,8 @@ static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp, return 0; } -static inline int iwch_build_rdma_recv(struct iwch_dev *rhp, - union t3_wr *wqe, - struct ib_recv_wr *wr) +static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, + struct ib_recv_wr *wr) { int i, err = 0; u32 pbl_addr[4]; @@ -442,7 +439,7 @@ int iwch_bind_mw(struct ib_qp *qp, wqe->bind.type = T3_VA_BASED_TO; /* TBD: check perms */ - wqe->bind.perms = iwch_convert_access(mw_bind->mw_access_flags); + wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); wqe->bind.mw_len = cpu_to_be32(mw_bind->length); @@ -474,8 +471,7 @@ int iwch_bind_mw(struct ib_qp *qp, return err; } -static inline void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, - int tagged) +static void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, int tagged) { switch (t3err) { case TPT_ERR_STAG: @@ -673,7 +669,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) spin_lock_irqsave(&qhp->lock, *flag); } -static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) { if (t3b_device(qhp->rhp)) cxio_set_wq_in_error(&qhp->wq); @@ -685,7 +681,7 @@ static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag) /* * Return non zero if at least one RECV was pre-posted. */ -static inline int rqes_posted(struct iwch_qp *qhp) +static int rqes_posted(struct iwch_qp *qhp) { return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; } @@ -846,6 +842,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; + if (t3b_device(qhp->rhp)) + cxio_set_wq_in_error(&qhp->wq); if (!internal) terminate = 1; break; diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h index c4e7fbea8bbd..cb7086f558c1 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/drivers/infiniband/hw/cxgb3/iwch_user.h @@ -1,6 +1,5 @@ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig index 727b10d89686..1a854598e0e6 100644 --- a/drivers/infiniband/hw/ehca/Kconfig +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -7,11 +7,3 @@ config INFINIBAND_EHCA To compile the driver as a module, choose M here. The module will be called ib_ehca. -config INFINIBAND_EHCA_SCALING - bool "Scaling support (EXPERIMENTAL)" - depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL - default y - ---help--- - eHCA scaling support schedules the CQ callbacks to different CPUs. - - To enable this feature choose Y here. diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index cf95ee474b0f..10fb8fbafa0c 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -42,8 +42,6 @@ #ifndef __EHCA_CLASSES_H__ #define __EHCA_CLASSES_H__ -#include "ehca_classes.h" -#include "ipz_pt_fn.h" struct ehca_module; struct ehca_qp; @@ -54,15 +52,25 @@ struct ehca_mw; struct ehca_pd; struct ehca_av; -#ifdef CONFIG_PPC64 -#include "ehca_classes_pSeries.h" -#endif +#include <linux/wait.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif +#include "ipz_pt_fn.h" +#include "ehca_qes.h" #include "ehca_irq.h" +#define EHCA_EQE_CACHE_SIZE 20 + +struct ehca_eqe_cache_entry { + struct ehca_eqe *eqe; + struct ehca_cq *cq; +}; + struct ehca_eq { u32 length; struct ipz_queue ipz_queue; @@ -74,6 +82,8 @@ struct ehca_eq { spinlock_t spinlock; struct tasklet_struct interrupt_task; u32 ist; + spinlock_t irq_spinlock; + struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; }; struct ehca_sport { @@ -96,6 +106,7 @@ struct ehca_shca { struct ehca_mr *maxmr; struct ehca_pd *pd; struct h_galpas galpas; + struct mutex modify_mutex; }; struct ehca_pd { @@ -145,7 +156,9 @@ struct ehca_cq { spinlock_t cb_lock; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; - u32 nr_callbacks; + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + u32 nr_events; /* #events seen */ + wait_queue_head_t wait_completion; spinlock_t task_lock; u32 ownpid; /* mmap counter for resources mapped into user space */ @@ -269,6 +282,7 @@ extern struct idr ehca_cq_idr; extern int ehca_static_rate; extern int ehca_port_act_time; extern int ehca_use_hp_mr; +extern int ehca_scaling_code; struct ipzu_queue_resp { u32 qe_size; /* queue entry size */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 6ebfa27e4e16..e2cdc1a16fe9 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, spin_lock_init(&my_cq->spinlock); spin_lock_init(&my_cq->cb_lock); spin_lock_init(&my_cq->task_lock); + init_waitqueue_head(&my_cq->wait_completion); my_cq->ownpid = current->tgid; cq = &my_cq->ib_cq; @@ -302,6 +303,16 @@ create_cq_exit1: return cq; } +static int get_cq_nr_events(struct ehca_cq *my_cq) +{ + int ret; + unsigned long flags; + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + ret = my_cq->nr_events; + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + return ret; +} + int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; @@ -329,10 +340,11 @@ int ehca_destroy_cq(struct ib_cq *cq) } spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_callbacks) { + while (my_cq->nr_events) { spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - yield(); + wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq)); spin_lock_irqsave(&ehca_cq_idr_lock, flags); + /* recheck nr_events to assure no cqe has just arrived */ } idr_remove(&ehca_cq_idr, my_cq->token); diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 24ceab0bae4a..4961eb88827c 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shca, struct ib_device *ib_dev = &shca->ib_device; spin_lock_init(&eq->spinlock); + spin_lock_init(&eq->irq_spinlock); eq->is_initialized = 0; if (type != EHCA_EQ && type != EHCA_NEQ) { diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index b7be950ab47c..32b55a4f0e5b 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev, break; } + props->port_cap_flags = rblock->capability_mask; props->gid_tbl_len = rblock->gid_tbl_len; props->max_msg_sz = rblock->max_msg_sz; props->bad_pkey_cntr = rblock->bad_pkey_cntr; @@ -162,6 +163,9 @@ int ehca_query_port(struct ib_device *ibdev, props->active_width = IB_WIDTH_12X; props->active_speed = 0x1; + /* at the moment (logical) link state is always LINK_UP */ + props->phys_state = 0x5; + query_port1: ehca_free_fw_ctrlblock(rblock); @@ -233,10 +237,60 @@ query_gid1: return ret; } +const u32 allowed_port_caps = ( + IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP); + int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, struct ib_port_modify *props) { - /* Not implemented yet */ - return -EFAULT; + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct hipz_query_port *rblock; + u32 cap; + u64 hret; + + if ((props->set_port_cap_mask | props->clr_port_cap_mask) + & ~allowed_port_caps) { + ehca_err(&shca->ib_device, "Non-changeable bits set in masks " + "set=%x clr=%x allowed=%x", props->set_port_cap_mask, + props->clr_port_cap_mask, allowed_port_caps); + return -EINVAL; + } + + if (mutex_lock_interruptible(&shca->modify_mutex)) + return -ERESTARTSYS; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto modify_port1; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto modify_port2; + } + + cap = (rblock->capability_mask | props->set_port_cap_mask) + & ~props->clr_port_cap_mask; + + hret = hipz_h_modify_port(shca->ipz_hca_handle, port, + cap, props->init_type, port_modify_mask); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret); + ret = -EINVAL; + } + +modify_port2: + ehca_free_fw_ctrlblock(rblock); + +modify_port1: + mutex_unlock(&shca->modify_mutex); + + return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 6c4f9f91b15d..f284be1c9166 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -63,13 +63,11 @@ #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) #define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) -#ifdef CONFIG_INFINIBAND_EHCA_SCALING - static void queue_comp_task(struct ehca_cq *__cq); static struct ehca_comp_pool* pool; +#ifdef CONFIG_HOTPLUG_CPU static struct notifier_block comp_pool_callback_nb; - #endif static inline void comp_event_callback(struct ehca_cq *cq) @@ -206,7 +204,7 @@ static void qp_event_callback(struct ehca_shca *shca, } static void cq_event_callback(struct ehca_shca *shca, - u64 eqe) + u64 eqe) { struct ehca_cq *cq; unsigned long flags; @@ -318,7 +316,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) "disruptive port %x configuration change", port); ehca_info(&shca->ib_device, - "port %x is inactive.", port); + "port %x is inactive.", port); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; event.element.port_num = port; @@ -326,7 +324,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) ib_dispatch_event(&event); ehca_info(&shca->ib_device, - "port %x is active.", port); + "port %x is active.", port); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ACTIVE; event.element.port_num = port; @@ -401,200 +399,274 @@ irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) return IRQ_HANDLED; } -void ehca_tasklet_eq(unsigned long data) + +static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) { - struct ehca_shca *shca = (struct ehca_shca*)data; - struct ehca_eqe *eqe; - int int_state; - int query_cnt = 0; + u64 eqe_value; + u32 token; + unsigned long flags; + struct ehca_cq *cq; - do { - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + eqe_value = eqe->entry; + ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value); + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + ehca_dbg(&shca->ib_device, "Got completion event"); + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, token); + if (cq == NULL) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq token=%x", + token); + return; + } + reset_eq_pending(cq); + cq->nr_events++; + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + if (ehca_scaling_code) + queue_comp_task(cq); + else { + comp_event_callback(cq); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq->nr_events--; + if (!cq->nr_events) + wake_up(&cq->wait_completion); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eqe_value); + } +} - if ((shca->hw_level >= 2) && eqe) - int_state = 1; - else - int_state = 0; - - while ((int_state == 1) || eqe) { - while (eqe) { - u64 eqe_value = eqe->entry; - - ehca_dbg(&shca->ib_device, - "eqe_value=%lx", eqe_value); - - /* TODO: better structure */ - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, - eqe_value)) { - unsigned long flags; - u32 token; - struct ehca_cq *cq; - - ehca_dbg(&shca->ib_device, - "... completion event"); - token = - EHCA_BMASK_GET(EQE_CQ_TOKEN, - eqe_value); - spin_lock_irqsave(&ehca_cq_idr_lock, - flags); - cq = idr_find(&ehca_cq_idr, token); - - if (cq == NULL) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); - break; - } - - reset_eq_pending(cq); -#ifdef CONFIG_INFINIBAND_EHCA_SCALING - queue_comp_task(cq); - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); -#else - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); - comp_event_callback(cq); -#endif - } else { - ehca_dbg(&shca->ib_device, - "... non completion event"); - parse_identifier(shca, eqe_value); - } - eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, - &shca->eq); - } +void ehca_process_eq(struct ehca_shca *shca, int is_irq) +{ + struct ehca_eq *eq = &shca->eq; + struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; + u64 eqe_value; + unsigned long flags; + int eqe_cnt, i; + int eq_empty = 0; + + spin_lock_irqsave(&eq->irq_spinlock, flags); + if (is_irq) { + const int max_query_cnt = 100; + int query_cnt = 0; + int int_state = 1; + do { + int_state = hipz_h_query_int_state( + shca->ipz_hca_handle, eq->ist); + query_cnt++; + iosync(); + } while (int_state && query_cnt < max_query_cnt); + if (unlikely((query_cnt == max_query_cnt))) + ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", + int_state, query_cnt); + } - if (shca->hw_level >= 2) { - int_state = - hipz_h_query_int_state(shca->ipz_hca_handle, - shca->eq.ist); - query_cnt++; - iosync(); - if (query_cnt >= 100) { - query_cnt = 0; - int_state = 0; - } + /* read out all eqes */ + eqe_cnt = 0; + do { + u32 token; + eqe_cache[eqe_cnt].eqe = + (struct ehca_eqe *)ehca_poll_eq(shca, eq); + if (!eqe_cache[eqe_cnt].eqe) + break; + eqe_value = eqe_cache[eqe_cnt].eqe->entry; + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + spin_lock(&ehca_cq_idr_lock); + eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (!eqe_cache[eqe_cnt].cq) { + spin_unlock(&ehca_cq_idr_lock); + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq " + "token=%x", token); + continue; } - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); - + eqe_cache[eqe_cnt].cq->nr_events++; + spin_unlock(&ehca_cq_idr_lock); + } else + eqe_cache[eqe_cnt].cq = NULL; + eqe_cnt++; + } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); + if (!eqe_cnt) { + if (is_irq) + ehca_dbg(&shca->ib_device, + "No eqe found for irq event"); + goto unlock_irq_spinlock; + } else if (!is_irq) + ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) + ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); + /* enable irq for new packets */ + for (i = 0; i < eqe_cnt; i++) { + if (eq->eqe_cache[i].cq) + reset_eq_pending(eq->eqe_cache[i].cq); + } + /* check eq */ + spin_lock(&eq->spinlock); + eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); + spin_unlock(&eq->spinlock); + /* call completion handler for cached eqes */ + for (i = 0; i < eqe_cnt; i++) + if (eq->eqe_cache[i].cq) { + if (ehca_scaling_code) + queue_comp_task(eq->eqe_cache[i].cq); + else { + struct ehca_cq *cq = eq->eqe_cache[i].cq; + comp_event_callback(cq); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq->nr_events--; + if (!cq->nr_events) + wake_up(&cq->wait_completion); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eq->eqe_cache[i].eqe->entry); } - } while (int_state != 0); + /* poll eq if not empty */ + if (eq_empty) + goto unlock_irq_spinlock; + do { + struct ehca_eqe *eqe; + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + if (!eqe) + break; + process_eqe(shca, eqe); + } while (1); - return; +unlock_irq_spinlock: + spin_unlock_irqrestore(&eq->irq_spinlock, flags); } -#ifdef CONFIG_INFINIBAND_EHCA_SCALING +void ehca_tasklet_eq(unsigned long data) +{ + ehca_process_eq((struct ehca_shca*)data, 1); +} static inline int find_next_online_cpu(struct ehca_comp_pool* pool) { - unsigned long flags_last_cpu; + int cpu; + unsigned long flags; + WARN_ON_ONCE(!in_interrupt()); if (ehca_debug_level) ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); - spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu); - pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map); - if (pool->last_cpu == NR_CPUS) - pool->last_cpu = first_cpu(cpu_online_map); - spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu); + spin_lock_irqsave(&pool->last_cpu_lock, flags); + cpu = next_cpu(pool->last_cpu, cpu_online_map); + if (cpu == NR_CPUS) + cpu = first_cpu(cpu_online_map); + pool->last_cpu = cpu; + spin_unlock_irqrestore(&pool->last_cpu_lock, flags); - return pool->last_cpu; + return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, struct ehca_cpu_comp_task *cct) { - unsigned long flags_cct; - unsigned long flags_cq; + unsigned long flags; - spin_lock_irqsave(&cct->task_lock, flags_cct); - spin_lock_irqsave(&__cq->task_lock, flags_cq); + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&__cq->task_lock); if (__cq->nr_callbacks == 0) { __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; wake_up(&cct->wait_queue); - } - else + } else __cq->nr_callbacks++; - spin_unlock_irqrestore(&__cq->task_lock, flags_cq); - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + spin_unlock(&__cq->task_lock); + spin_unlock_irqrestore(&cct->task_lock, flags); } static void queue_comp_task(struct ehca_cq *__cq) { - int cpu; int cpu_id; struct ehca_cpu_comp_task *cct; + int cq_jobs; + unsigned long flags; - cpu = get_cpu(); cpu_id = find_next_online_cpu(pool); - BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + BUG_ON(!cct); - if (cct->cq_jobs > 0) { + spin_lock_irqsave(&cct->task_lock, flags); + cq_jobs = cct->cq_jobs; + spin_unlock_irqrestore(&cct->task_lock, flags); + if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + BUG_ON(!cct); } __queue_comp_task(__cq, cct); - - put_cpu(); - - return; } static void run_comp_task(struct ehca_cpu_comp_task* cct) { struct ehca_cq *cq; - unsigned long flags_cct; - unsigned long flags_cq; + unsigned long flags; - spin_lock_irqsave(&cct->task_lock, flags_cct); + spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + spin_unlock_irqrestore(&cct->task_lock, flags); comp_event_callback(cq); - spin_lock_irqsave(&cct->task_lock, flags_cct); - spin_lock_irqsave(&cq->task_lock, flags_cq); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq->nr_events--; + if (!cq->nr_events) + wake_up(&cq->wait_completion); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&cq->task_lock); cq->nr_callbacks--; - if (cq->nr_callbacks == 0) { + if (!cq->nr_callbacks) { list_del_init(cct->cq_list.next); cct->cq_jobs--; } - spin_unlock_irqrestore(&cq->task_lock, flags_cq); - + spin_unlock(&cq->task_lock); } - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - return; + spin_unlock_irqrestore(&cct->task_lock, flags); } static int comp_task(void *__cct) { struct ehca_cpu_comp_task* cct = __cct; + int cql_empty; DECLARE_WAITQUEUE(wait, current); set_current_state(TASK_INTERRUPTIBLE); while(!kthread_should_stop()) { add_wait_queue(&cct->wait_queue, &wait); - if (list_empty(&cct->cq_list)) + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + spin_unlock_irq(&cct->task_lock); + if (cql_empty) schedule(); else __set_current_state(TASK_RUNNING); remove_wait_queue(&cct->wait_queue, &wait); - if (!list_empty(&cct->cq_list)) + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + spin_unlock_irq(&cct->task_lock); + if (!cql_empty) run_comp_task(__cct); set_current_state(TASK_INTERRUPTIBLE); @@ -637,8 +709,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool, if (task) kthread_stop(task); - - return; } static void take_over_work(struct ehca_comp_pool *pool, @@ -654,17 +724,18 @@ static void take_over_work(struct ehca_comp_pool *pool, list_splice_init(&cct->cq_list, &list); while(!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - list_del(&cq->entry); - __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, - smp_processor_id())); + list_del(&cq->entry); + __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, + smp_processor_id())); } spin_unlock_irqrestore(&cct->task_lock, flags_cct); } +#ifdef CONFIG_HOTPLUG_CPU static int comp_pool_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -707,15 +778,16 @@ static int comp_pool_callback(struct notifier_block *nfb, return NOTIFY_OK; } - #endif int ehca_create_comp_pool(void) { -#ifdef CONFIG_INFINIBAND_EHCA_SCALING int cpu; struct task_struct *task; + if (!ehca_scaling_code) + return 0; + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); if (pool == NULL) return -ENOMEM; @@ -737,20 +809,27 @@ int ehca_create_comp_pool(void) } } +#ifdef CONFIG_HOTPLUG_CPU comp_pool_callback_nb.notifier_call = comp_pool_callback; comp_pool_callback_nb.priority =0; register_cpu_notifier(&comp_pool_callback_nb); #endif + printk(KERN_INFO "eHCA scaling code enabled\n"); + return 0; } void ehca_destroy_comp_pool(void) { -#ifdef CONFIG_INFINIBAND_EHCA_SCALING int i; + if (!ehca_scaling_code) + return; + +#ifdef CONFIG_HOTPLUG_CPU unregister_cpu_notifier(&comp_pool_callback_nb); +#endif for (i = 0; i < NR_CPUS; i++) { if (cpu_online(i)) @@ -758,7 +837,4 @@ void ehca_destroy_comp_pool(void) } free_percpu(pool->cpu_comp_tasks); kfree(pool); -#endif - - return; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index be579cc0adf6..6ed06ee033ed 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -56,6 +56,7 @@ void ehca_tasklet_neq(unsigned long data); irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); void ehca_tasklet_eq(unsigned long data); +void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { wait_queue_head_t wait_queue; diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 1155bcf48212..4700085ba834 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0020"); +MODULE_VERSION("SVNEHCA_0022"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -62,6 +62,7 @@ int ehca_use_hp_mr = 0; int ehca_port_act_time = 30; int ehca_poll_all_eqs = 1; int ehca_static_rate = -1; +int ehca_scaling_code = 1; module_param_named(open_aqp1, ehca_open_aqp1, int, 0); module_param_named(debug_level, ehca_debug_level, int, 0); @@ -71,6 +72,7 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); module_param_named(port_act_time, ehca_port_act_time, int, 0); module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); module_param_named(static_rate, ehca_static_rate, int, 0); +module_param_named(scaling_code, ehca_scaling_code, int, 0); MODULE_PARM_DESC(open_aqp1, "AQP1 on startup (0: no (default), 1: yes)"); @@ -91,6 +93,8 @@ MODULE_PARM_DESC(poll_all_eqs, " (0: no, 1: yes (default))"); MODULE_PARM_DESC(static_rate, "set permanent static rate (default: disabled)"); +MODULE_PARM_DESC(scaling_code, + "set scaling code (0: disabled, 1: enabled/default)"); spinlock_t ehca_qp_idr_lock; spinlock_t ehca_cq_idr_lock; @@ -432,8 +436,8 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport) static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", - ehca_debug_level); + return snprintf(buf, PAGE_SIZE, "%d\n", + ehca_debug_level); } static ssize_t ehca_store_debug_level(struct device_driver *ddp, @@ -561,11 +565,11 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, const struct of_device_id *id) { struct ehca_shca *shca; - u64 *handle; + const u64 *handle; struct ib_pd *ibpd; int ret; - handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL); + handle = get_property(dev->ofdev.node, "ibm,hca-handle", NULL); if (!handle) { ehca_gen_err("Cannot get eHCA handle for adapter: %s.", dev->ofdev.node->full_name); @@ -583,6 +587,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, ehca_gen_err("Cannot allocate shca memory."); return -ENOMEM; } + mutex_init(&shca->modify_mutex); shca->ibmebus_dev = dev; shca->ipz_hca_handle.handle = *handle; @@ -778,8 +783,24 @@ void ehca_poll_eqs(unsigned long data) spin_lock(&shca_list_lock); list_for_each_entry(shca, &shca_list, shca_list) { - if (shca->eq.is_initialized) - ehca_tasklet_eq((unsigned long)(void*)shca); + if (shca->eq.is_initialized) { + /* call deadman proc only if eq ptr does not change */ + struct ehca_eq *eq = &shca->eq; + int max = 3; + volatile u64 q_ofs, q_ofs2; + u64 flags; + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + do { + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs2 = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + max--; + } while (q_ofs == q_ofs2 && max > 0); + if (q_ofs == q_ofs2) + ehca_process_eq(shca, 0); + } } mod_timer(&poll_eqs_timer, jiffies + HZ); spin_unlock(&shca_list_lock); @@ -790,7 +811,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0020)\n"); + "(Rel.: SVNEHCA_0022)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 3fb46e67df87..b564fcd3b282 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -70,6 +70,10 @@ #define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) #define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) +#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) +#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) +#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) + /* direct access qp controls */ #define DAQP_CTRL_ENABLE 0x01 #define DAQP_CTRL_SEND_COMP 0x20 @@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, return ret; } +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask) +{ + u64 port_attributes = port_cap; + + if (modify_mask & IB_PORT_SHUTDOWN) + port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); + if (modify_mask & IB_PORT_INIT_TYPE) + port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); + if (modify_mask & IB_PORT_RESET_QKEY_CNTR) + port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); + + return ehca_plpar_hcall_norets(H_MODIFY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + port_attributes, /* r6 */ + 0, 0, 0, 0); +} + u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock) { diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 587ebd470959..2869f7dd6196 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, struct hipz_query_port *query_port_response_block); +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask); + u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index dc3bda2634b7..8199c45768a3 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -79,7 +79,7 @@ static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) if (q_offset >= queue->queue_length) return NULL; current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; - return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; } /* @@ -247,6 +247,15 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) return ret; } +static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *) ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + return ret; +} + /* returns address (GX) of first queue entry */ static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) { diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 54139d398181..10c008f22ba6 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -78,6 +78,8 @@ #define IPATH_IB_LINKINIT 3 #define IPATH_IB_LINKDOWN_SLEEP 4 #define IPATH_IB_LINKDOWN_DISABLE 5 +#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */ +#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ /* * stats maintained by the driver. For now, at least, this is global @@ -316,11 +318,17 @@ struct ipath_base_info { /* address of readonly memory copy of the rcvhdrq tail register. */ __u64 spi_rcvhdr_tailaddr; - /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ + /* shared memory pages for subports if port is shared */ __u64 spi_subport_uregbase; __u64 spi_subport_rcvegrbuf; __u64 spi_subport_rcvhdr_base; + /* shared memory page for hardware port if it is shared */ + __u64 spi_port_uregbase; + __u64 spi_port_rcvegrbuf; + __u64 spi_port_rcvhdr_base; + __u64 spi_port_rcvhdr_tailaddr; + } __attribute__ ((aligned(8))); @@ -344,7 +352,7 @@ struct ipath_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define IPATH_USER_SWMINOR 3 +#define IPATH_USER_SWMINOR 5 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) @@ -418,11 +426,14 @@ struct ipath_user_info { #define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ #define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ #define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ -#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ +#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */ #define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ #define IPATH_CMD_USER_INIT 24 /* set up userspace */ +#define IPATH_CMD_UNUSED_1 25 +#define IPATH_CMD_UNUSED_2 26 +#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ -#define IPATH_CMD_MAX 24 +#define IPATH_CMD_MAX 27 struct ipath_port_info { __u32 num_active; /* number of active units */ @@ -430,7 +441,7 @@ struct ipath_port_info { __u16 port; /* port on unit assigned to caller */ __u16 subport; /* subport on unit assigned to caller */ __u16 num_ports; /* number of ports available on unit */ - __u16 num_subports; /* number of subport slaves opened on port */ + __u16 num_subports; /* number of subports opened on port */ }; struct ipath_tid_info { diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 87462e0cb4d2..ea78e6dddc90 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) } return; } - wc->queue[head] = *entry; + wc->queue[head].wr_id = entry->wr_id; + wc->queue[head].status = entry->status; + wc->queue[head].opcode = entry->opcode; + wc->queue[head].vendor_err = entry->vendor_err; + wc->queue[head].byte_len = entry->byte_len; + wc->queue[head].imm_data = (__u32 __force)entry->imm_data; + wc->queue[head].qp_num = entry->qp->qp_num; + wc->queue[head].src_qp = entry->src_qp; + wc->queue[head].wc_flags = entry->wc_flags; + wc->queue[head].pkey_index = entry->pkey_index; + wc->queue[head].slid = entry->slid; + wc->queue[head].sl = entry->sl; + wc->queue[head].dlid_path_bits = entry->dlid_path_bits; + wc->queue[head].port_num = entry->port_num; wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || @@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) if (tail > (u32) cq->ibcq.cqe) tail = (u32) cq->ibcq.cqe; for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { + struct ipath_qp *qp; + if (tail == wc->head) break; - *entry = wc->queue[tail]; + + qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table, + wc->queue[tail].qp_num); + entry->qp = &qp->ibqp; + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + entry->wr_id = wc->queue[tail].wr_id; + entry->status = wc->queue[tail].status; + entry->opcode = wc->queue[tail].opcode; + entry->vendor_err = wc->queue[tail].vendor_err; + entry->byte_len = wc->queue[tail].byte_len; + entry->imm_data = wc->queue[tail].imm_data; + entry->src_qp = wc->queue[tail].src_qp; + entry->wc_flags = wc->queue[tail].wc_flags; + entry->pkey_index = wc->queue[tail].pkey_index; + entry->slid = wc->queue[tail].slid; + entry->sl = wc->queue[tail].sl; + entry->dlid_path_bits = wc->queue[tail].dlid_path_bits; + entry->port_num = wc->queue[tail].port_num; if (tail >= cq->ibcq.cqe) tail = 0; else diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index df69f0d80b8b..42bfbdb0d3e6 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -57,6 +57,7 @@ #define __IPATH_PROCDBG 0x100 /* print mmap/nopage stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x200 +#define __IPATH_ERRPKTDBG 0x400 #define __IPATH_USER_SEND 0x1000 /* use user mode send */ #define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ #define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 0f13a2182cc7..63e8368b0e95 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp) } fp->private_data = dd; - ipath_diag_inuse = 1; + ipath_diag_inuse = -2; diag_set_link = 0; ret = 0; @@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data, else if ((count % 4) || (*off % 4)) /* address or length is not 32-bit aligned, hence invalid */ ret = -EINVAL; + else if (ipath_diag_inuse < 1 && (*off || count != 8)) + ret = -EINVAL; /* prevent cat /dev/ipath_diag* */ else if ((count % 8) || (*off % 8)) /* address or length not 64-bit aligned; do 32-bit reads */ ret = ipath_read_umem32(dd, data, kreg_base + *off, count); @@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data, if (ret >= 0) { *off += count; ret = count; + if (ipath_diag_inuse == -2) + ipath_diag_inuse++; } return ret; @@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data, else if ((count % 4) || (*off % 4)) /* address or length is not 32-bit aligned, hence invalid */ ret = -EINVAL; + else if ((ipath_diag_inuse == -1 && (*off || count != 8)) || + ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */ + ret = -EINVAL; /* before any other write allowed */ else if ((count % 8) || (*off % 8)) /* address or length not 64-bit aligned; do 32-bit writes */ ret = ipath_write_umem32(dd, kreg_base + *off, data, count); @@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data, if (ret >= 0) { *off += count; ret = count; + if (ipath_diag_inuse == -1) + ipath_diag_inuse = 1; /* all read/write OK now */ } return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c index 6e0f2b8918ce..f87f003e3ef8 100644 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -96,8 +96,8 @@ static void ipath_dma_unmap_page(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) +static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) { u64 addr; int i; @@ -167,7 +167,7 @@ static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size, } static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) + void *cpu_addr, u64 dma_handle) { free_pages((unsigned long) cpu_addr, get_order(size)); } diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index ae7f21a0cdc0..e3a223209710 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, /* setup the chip-specific functions, as early as possible. */ switch (ent->device) { -#ifdef CONFIG_HT_IRQ case PCI_DEVICE_ID_INFINIPATH_HT: +#ifdef CONFIG_HT_IRQ ipath_init_iba6110_funcs(dd); break; +#else + ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if " + "CONFIG_HT_IRQ is not enabled\n", ent->device); + return -ENODEV; #endif -#ifdef CONFIG_PCI_MSI case PCI_DEVICE_ID_INFINIPATH_PE800: +#ifdef CONFIG_PCI_MSI ipath_init_iba6120_funcs(dd); break; +#else + ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if " + "CONFIG_PCI_MSI is not enabled\n", ent->device); + return -ENODEV; #endif default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " @@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ if (ret) - goto bail_iounmap; + goto bail_irqsetup; ret = ipath_enable_wc(dd); @@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, goto bail; +bail_irqsetup: + if (pdev->irq) free_irq(pdev->irq, dd); + bail_iounmap: iounmap((volatile void __iomem *) dd->ipath_kregbase); @@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) { int port; - ipath_shutdown_device(dd); - if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { /* can't do anything more with chip; needs re-init */ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; @@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", dd->ipath_pageshadow); - vfree(dd->ipath_pageshadow); + tmpp = dd->ipath_pageshadow; dd->ipath_pageshadow = NULL; + vfree(tmpp); } /* @@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); + /* + * disable the IB link early, to be sure no new packets arrive, which + * complicates the shutdown process + */ + ipath_shutdown_device(dd); + if (dd->verbs_dev) ipath_unregister_ib_device(dd->verbs_dev); @@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; } -void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) +/* + * Decode the error status into strings, deciding whether to always + * print * it or not depending on "normal packet errors" vs everything + * else. Return 1 if "real" errors, otherwise 0 if only packet + * errors, so caller can decide what to print with the string. + */ +int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) { + int iserr = 1; *buf = '\0'; + if (err & INFINIPATH_E_PKTERRS) { + if (!(err & ~INFINIPATH_E_PKTERRS)) + iserr = 0; // if only packet errors. + if (ipath_debug & __IPATH_ERRPKTDBG) { + if (err & INFINIPATH_E_REBP) + strlcat(buf, "EBP ", blen); + if (err & INFINIPATH_E_RVCRC) + strlcat(buf, "VCRC ", blen); + if (err & INFINIPATH_E_RICRC) { + strlcat(buf, "CRC ", blen); + // clear for check below, so only once + err &= INFINIPATH_E_RICRC; + } + if (err & INFINIPATH_E_RSHORTPKTLEN) + strlcat(buf, "rshortpktlen ", blen); + if (err & INFINIPATH_E_SDROPPEDDATAPKT) + strlcat(buf, "sdroppeddatapkt ", blen); + if (err & INFINIPATH_E_SPKTLEN) + strlcat(buf, "spktlen ", blen); + } + if ((err & INFINIPATH_E_RICRC) && + !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) + strlcat(buf, "CRC ", blen); + if (!iserr) + goto done; + } if (err & INFINIPATH_E_RHDRLEN) strlcat(buf, "rhdrlen ", blen); if (err & INFINIPATH_E_RBADTID) @@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "rhdr ", blen); if (err & INFINIPATH_E_RLONGPKTLEN) strlcat(buf, "rlongpktlen ", blen); - if (err & INFINIPATH_E_RSHORTPKTLEN) - strlcat(buf, "rshortpktlen ", blen); if (err & INFINIPATH_E_RMAXPKTLEN) strlcat(buf, "rmaxpktlen ", blen); if (err & INFINIPATH_E_RMINPKTLEN) strlcat(buf, "rminpktlen ", blen); + if (err & INFINIPATH_E_SMINPKTLEN) + strlcat(buf, "sminpktlen ", blen); if (err & INFINIPATH_E_RFORMATERR) strlcat(buf, "rformaterr ", blen); if (err & INFINIPATH_E_RUNSUPVL) @@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "runexpchar ", blen); if (err & INFINIPATH_E_RIBFLOW) strlcat(buf, "ribflow ", blen); - if (err & INFINIPATH_E_REBP) - strlcat(buf, "EBP ", blen); if (err & INFINIPATH_E_SUNDERRUN) strlcat(buf, "sunderrun ", blen); if (err & INFINIPATH_E_SPIOARMLAUNCH) strlcat(buf, "spioarmlaunch ", blen); if (err & INFINIPATH_E_SUNEXPERRPKTNUM) strlcat(buf, "sunexperrpktnum ", blen); - if (err & INFINIPATH_E_SDROPPEDDATAPKT) - strlcat(buf, "sdroppeddatapkt ", blen); if (err & INFINIPATH_E_SDROPPEDSMPPKT) strlcat(buf, "sdroppedsmppkt ", blen); if (err & INFINIPATH_E_SMAXPKTLEN) strlcat(buf, "smaxpktlen ", blen); - if (err & INFINIPATH_E_SMINPKTLEN) - strlcat(buf, "sminpktlen ", blen); if (err & INFINIPATH_E_SUNSUPVL) strlcat(buf, "sunsupVL ", blen); - if (err & INFINIPATH_E_SPKTLEN) - strlcat(buf, "spktlen ", blen); if (err & INFINIPATH_E_INVALIDADDR) strlcat(buf, "invalidaddr ", blen); - if (err & INFINIPATH_E_RICRC) - strlcat(buf, "CRC ", blen); - if (err & INFINIPATH_E_RVCRC) - strlcat(buf, "VCRC ", blen); if (err & INFINIPATH_E_RRCVEGRFULL) strlcat(buf, "rcvegrfull ", blen); if (err & INFINIPATH_E_RRCVHDRFULL) @@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "hardware ", blen); if (err & INFINIPATH_E_RESET) strlcat(buf, "reset ", blen); +done: + return iserr; } /** @@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) lstate = IPATH_LINKACTIVE; break; + case IPATH_IB_LINK_LOOPBACK: + dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); + dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + ret = 0; + goto bail; // no state change to wait for + + case IPATH_IB_LINK_EXTERNAL: + dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n"); + dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + ret = 0; + goto bail; // no state change to wait for + default: ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); ret = -EINVAL; @@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) return 0; } -/** - * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register - * @dd: the infinipath device - * @regno: the register number to read - * @port: the port containing the register - * - * Registers that vary with the chip implementation constants (port) - * use this routine. - */ -u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno, - unsigned port) -{ - u16 where; - - if (port < dd->ipath_portcnt && - (regno == dd->ipath_kregs->kr_rcvhdraddr || - regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) - where = regno + port; - else - where = -1; - - return ipath_read_kreg64(dd, where); -} /** * ipath_write_kreg_port - write a device's per-port 64-bit kernel register @@ -1973,7 +2005,8 @@ static int __init infinipath_init(void) { int ret; - ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); + if (ipath_debug & __IPATH_DBG) + printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); /* * These must be called before the driver is registered with diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index a4019a6b7560..030185f90ee2 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) } else memcpy(dd->ipath_serial, ifp->if_serial, sizeof ifp->if_serial); + if (!strstr(ifp->if_comment, "Tested successfully")) + ipath_dev_err(dd, "Board SN %s did not pass functional " + "test: %s\n", dd->ipath_serial, + ifp->if_comment); ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", (unsigned long long) be64_to_cpu(dd->ipath_guid)); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 5d64ff875297..1272aaf2a785 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,12 +41,6 @@ #include "ipath_kernel.h" #include "ipath_common.h" -/* - * mmap64 doesn't allow all 64 bits for 32-bit applications - * so only use the low 43 bits. - */ -#define MMAP64_MASK 0x7FFFFFFFFFFUL - static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, @@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = { .mmap = ipath_mmap }; +/* + * Convert kernel virtual addresses to physical addresses so they don't + * potentially conflict with the chip addresses used as mmap offsets. + * It doesn't really matter what mmap offset we use as long as we can + * interpret it correctly. + */ +static u64 cvt_kvaddr(void *p) +{ + struct page *page; + u64 paddr = 0; + + page = vmalloc_to_page(p); + if (page) + paddr = page_to_pfn(page) << PAGE_SHIFT; + + return paddr; +} + static int ipath_get_base_info(struct file *fp, void __user *ubase, size_t ubase_size) { @@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp, sz = sizeof(*kinfo); /* If port sharing is not requested, allow the old size structure */ if (!shared) - sz -= 3 * sizeof(u64); + sz -= 7 * sizeof(u64); if (ubase_size < sz) { ipath_cdbg(PROC, "Base size %zu, need %zu (version mismatch?)\n", @@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp, kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * (dd->ipath_pbufsport - kinfo->spi_piocnt); - kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_palign * pd->port_port; } else { unsigned slave = subport_fp(fp) - 1; kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * kinfo->spi_piocnt * slave; - kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + - PAGE_SIZE * slave) & MMAP64_MASK; + } + if (shared) { + kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; + kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; + kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; - kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * slave) & MMAP64_MASK; - kinfo->spi_rcvhdr_tailaddr = - (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; - kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + - dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & - MMAP64_MASK; + kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + + PAGE_SIZE * subport_fp(fp)); + + kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport_fp(fp)); + kinfo->spi_rcvhdr_tailaddr = 0; + kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + + pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * + subport_fp(fp)); + + kinfo->spi_subport_uregbase = + cvt_kvaddr(pd->subport_uregbase); + kinfo->spi_subport_rcvegrbuf = + cvt_kvaddr(pd->subport_rcvegrbuf); + kinfo->spi_subport_rcvhdr_base = + cvt_kvaddr(pd->subport_rcvhdr_base); + ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", + kinfo->spi_port, kinfo->spi_runtime_flags, + (unsigned long long) kinfo->spi_subport_uregbase, + (unsigned long long) kinfo->spi_subport_rcvegrbuf, + (unsigned long long) kinfo->spi_subport_rcvhdr_base); } kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / @@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp, if (master) { kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; - kinfo->spi_subport_uregbase = - (u64) pd->subport_uregbase & MMAP64_MASK; - kinfo->spi_subport_rcvegrbuf = - (u64) pd->subport_rcvegrbuf & MMAP64_MASK; - kinfo->spi_subport_rcvhdr_base = - (u64) pd->subport_rcvhdr_base & MMAP64_MASK; - ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", - kinfo->spi_port, kinfo->spi_runtime_flags, - (unsigned long long) kinfo->spi_subport_uregbase, - (unsigned long long) kinfo->spi_subport_rcvegrbuf, - (unsigned long long) kinfo->spi_subport_rcvhdr_base); } - if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) + sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); + if (copy_to_user(ubase, kinfo, sz)) ret = -EFAULT; bail: @@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, struct ipath_devdata *dd; void *addr; size_t size; - int ret; + int ret = 0; /* If the port is not shared, all addresses should be physical */ - if (!pd->port_subport_cnt) { - ret = -EINVAL; + if (!pd->port_subport_cnt) goto bail; - } dd = pd->port_dd; size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; /* - * Master has all the slave uregbase, rcvhdrq, and - * rcvegrbufs mmapped. + * Each process has all the subport uregbase, rcvhdrq, and + * rcvegrbufs mmapped - as an array for all the processes, + * and also separately for this process. */ - if (subport == 0) { - unsigned num_slaves = pd->port_subport_cnt - 1; - - if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { - addr = pd->subport_uregbase; - size = PAGE_SIZE * num_slaves; - } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & - MMAP64_MASK)) { - addr = pd->subport_rcvhdr_base; - size = pd->port_rcvhdrq_size * num_slaves; - } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & - MMAP64_MASK)) { - addr = pd->subport_rcvegrbuf; - size *= num_slaves; - } else { - ret = -EINVAL; - goto bail; - } - } else if (pgaddr == (((u64) pd->subport_uregbase + - PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { - addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); - size = PAGE_SIZE; - } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * (subport - 1)) & - MMAP64_MASK)) { - addr = pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * (subport - 1); - size = pd->port_rcvhdrq_size; - } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + - size * (subport - 1)) & MMAP64_MASK)) { - addr = pd->subport_rcvegrbuf + size * (subport - 1); - /* rcvegrbufs are read-only on the slave */ - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, - "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); - ret = -EPERM; - goto bail; - } - /* - * Don't allow permission to later change to writeable - * with mprotect. - */ - vma->vm_flags &= ~VM_MAYWRITE; + if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { + addr = pd->subport_uregbase; + size = PAGE_SIZE * pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { + addr = pd->subport_rcvhdr_base; + size = pd->port_rcvhdrq_size * pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { + addr = pd->subport_rcvegrbuf; + size *= pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + + PAGE_SIZE * subport)) { + addr = pd->subport_uregbase + PAGE_SIZE * subport; + size = PAGE_SIZE; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport)) { + addr = pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport; + size = pd->port_rcvhdrq_size; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + + size * subport)) { + addr = pd->subport_rcvegrbuf + size * subport; + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; } else { - ret = -EINVAL; goto bail; } len = vma->vm_end - vma->vm_start; @@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &ipath_file_vm_ops; vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; - ret = 0; + ret = 1; bail: return ret; @@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) * Check for kernel virtual addresses first, anything else must * match a HW or memory address. */ - if (pgaddr >= (1ULL<<40)) { - ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + if (ret) { + if (ret > 0) + ret = 0; goto bail; } + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; if (!pd->port_subport_cnt) { /* port is not shared */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; piocnt = dd->ipath_pbufsport; piobufs = pd->port_piobufs; } else if (!subport_fp(fp)) { /* caller is the master */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + (dd->ipath_pbufsport % pd->port_subport_cnt); piobufs = pd->port_piobufs + @@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) unsigned slave = subport_fp(fp) - 1; /* caller is a slave */ - ureg = 0; piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; } @@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, (void *) dd->ipath_pioavailregs_dma, "pioavail registers"); - else if (subport_fp(fp)) - /* Subports don't mmap the physical receive buffers */ - ret = -EINVAL; else if (pgaddr == pd->port_rcvegr_phys) ret = mmap_rcvegrbufs(vma, pd); else if (pgaddr == (u64) pd->port_rcvhdrq_phys) @@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd, const struct ipath_user_info *uinfo) { int ret = 0; - unsigned num_slaves; + unsigned num_subports; size_t size; - /* Old user binaries don't know about subports */ - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) - goto bail; /* * If the user is requesting zero or one port, * skip the subport allocation. */ if (uinfo->spu_subport_cnt <= 1) goto bail; - if (uinfo->spu_subport_cnt > 4) { + + /* Old user binaries don't know about new subport implementation */ + if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) { + dev_info(&dd->pcidev->dev, + "Mismatched user minor version (%d) and driver " + "minor version (%d) while port sharing. Ensure " + "that driver and library are from the same " + "release.\n", + (int) (uinfo->spu_userversion & 0xffff), + IPATH_USER_SWMINOR); + goto bail; + } + if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { ret = -EINVAL; goto bail; } - num_slaves = uinfo->spu_subport_cnt - 1; - pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); + num_subports = uinfo->spu_subport_cnt; + pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports); if (!pd->subport_uregbase) { ret = -ENOMEM; goto bail; } /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * - sizeof(u32), PAGE_SIZE) * num_slaves; + sizeof(u32), PAGE_SIZE) * num_subports; pd->subport_rcvhdr_base = vmalloc(size); if (!pd->subport_rcvhdr_base) { ret = -ENOMEM; @@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd, pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * - num_slaves); + num_subports); if (!pd->subport_rcvegrbuf) { ret = -ENOMEM; goto bail_rhdr; @@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd, pd->port_subport_cnt = uinfo->spu_subport_cnt; pd->port_subport_id = uinfo->spu_subport_id; pd->active_slaves = 1; + set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); + memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports); + memset(pd->subport_rcvhdr_base, 0, size); + memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks * + pd->port_rcvegrbuf_size * + num_subports); goto bail; bail_rhdr: @@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp, */ if (!cpus_empty(current->cpus_allowed) && !cpus_full(current->cpus_allowed)) { - int ncpus = num_online_cpus(), curcpu = -1; + int ncpus = num_online_cpus(), curcpu = -1, nset = 0; for (i = 0; i < ncpus; i++) if (cpu_isset(i, current->cpus_allowed)) { ipath_cdbg(PROC, "%s[%u] affinity set for " - "cpu %d\n", current->comm, - current->pid, i); + "cpu %d/%d\n", current->comm, + current->pid, i, ncpus); curcpu = i; + nset++; } - if (curcpu != -1) { + if (curcpu != -1 && nset != ncpus) { if (npresent) { prefunit = curcpu / (ncpus / npresent); - ipath_dbg("%s[%u] %d chips, %d cpus, " + ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " "%d cpus/chip, select unit %d\n", current->comm, current->pid, npresent, ncpus, ncpus / npresent, @@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp, const struct ipath_user_info *uinfo) { int ret; - struct ipath_portdata *pd; + struct ipath_portdata *pd = port_fp(fp); struct ipath_devdata *dd; u32 head32; - pd = port_fp(fp); + /* Subports don't need to initialize anything since master did it. */ + if (subport_fp(fp)) { + ret = wait_event_interruptible(pd->port_wait, + !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); + goto done; + } + dd = pd->port_dd; if (uinfo->spu_rcvhdrsize) { @@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp, dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + /* Notify any waiting slaves */ + if (pd->port_subport_cnt) { + clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); + wake_up(&pd->port_wait); + } done: return ret; } @@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd, return ret; } +static int ipath_force_pio_avail_update(struct ipath_devdata *dd) +{ + u64 reg = dd->ipath_sendctrl; + + clear_bit(IPATH_S_PIOBUFAVAILUPD, ®); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + + return 0; +} + static ssize_t ipath_write(struct file *fp, const char __user *data, size_t count, loff_t *off) { @@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.part_key; src = &ucmd->cmd.part_key; break; - case IPATH_CMD_SLAVE_INFO: + case __IPATH_CMD_SLAVE_INFO: copy = sizeof(cmd.cmd.slave_mask_addr); dest = &cmd.cmd.slave_mask_addr; src = &ucmd->cmd.slave_mask_addr; break; + case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg + copy = 0; + src = NULL; + dest = NULL; + break; default: ret = -EINVAL; goto bail; } - if ((count - consumed) < copy) { - ret = -EINVAL; - goto bail; - } + if (copy) { + if ((count - consumed) < copy) { + ret = -EINVAL; + goto bail; + } - if (copy_from_user(dest, src, copy)) { - ret = -EFAULT; - goto bail; + if (copy_from_user(dest, src, copy)) { + ret = -EFAULT; + goto bail; + } + + consumed += copy; } - consumed += copy; pd = port_fp(fp); if (!pd && cmd.type != __IPATH_CMD_USER_INIT && cmd.type != IPATH_CMD_ASSIGN_PORT) { @@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, case IPATH_CMD_SET_PART_KEY: ret = ipath_set_part_key(pd, cmd.cmd.part_key); break; - case IPATH_CMD_SLAVE_INFO: + case __IPATH_CMD_SLAVE_INFO: ret = ipath_get_slave_info(pd, (void __user *) (unsigned long) cmd.cmd.slave_mask_addr); break; + case IPATH_CMD_PIOAVAILUPD: + ret = ipath_force_pio_avail_update(pd->port_dd); + break; } if (ret >= 0) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 5b40a846ff95..ed55979bfd34 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -451,12 +451,18 @@ bail: return ret; } -static void remove_file(struct dentry *parent, char *name) +static int remove_file(struct dentry *parent, char *name) { struct dentry *tmp; + int ret; tmp = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto bail; + } + spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { @@ -469,6 +475,14 @@ static void remove_file(struct dentry *parent, char *name) spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); } + + ret = 0; +bail: + /* + * We don't expect clients to care about the return value, but + * it's there if they need it. + */ + return ret; } static int remove_device_files(struct super_block *sb, diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 7468477ba837..4171198fc202 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -43,6 +43,9 @@ #include "ipath_kernel.h" #include "ipath_registers.h" +static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64); + + /* * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. @@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = { .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), /* - * These should not be used directly via ipath_read_kreg64(), - * use them with ipath_read_kreg64_port(), + * These should not be used directly via ipath_write_kreg64(), + * use them with ipath_write_kreg64_port(), */ .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) @@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = { #define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 #define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 + +/* TID entries (memory), HT-only */ +#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ +#define INFINIPATH_RT_VALID 0x8000000000000000ULL +#define INFINIPATH_RT_ADDR_SHIFT 0 +#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL +#define INFINIPATH_RT_BUFSIZE_SHIFT 48 + /* * masks and bits that are different in different chips, or present only * in one @@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), }; +#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) +#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \ + << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) + +static int ipath_ht_txe_recover(struct ipath_devdata *); + /** * ipath_ht_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, /* * make sure we get this much out, unless told to be quiet, + * it's a parity error we may recover from, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~(dd->ipath_lasthwerror | - ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || - (ipath_debug & __IPATH_VERBDBG)) + if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY | + RXE_EAGER_PARITY)) || + (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; @@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - if (ctrl & INFINIPATH_C_FREEZEMODE) { + if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { /* * parity errors in send memory are recoverable, * just cancel the send (if indicated in * sendbuffererror), @@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * occur if a processor speculative read is done to the PIO * buffer while we are sending a packet, for example. */ - if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - ipath_stats.sps_txeparity++; - ipath_dbg("Recovering from TXE parity error (%llu), " - "hwerrstatus=%llx\n", - (unsigned long long) ipath_stats.sps_txeparity, - (unsigned long long) hwerrs); - ipath_disarm_senderrbufs(dd); - hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - if (!hwerrs) { /* else leave in freeze mode */ - ipath_write_kreg(dd, - dd->ipath_kregs->kr_control, - dd->ipath_control); - return; - } - } - if (hwerrs) { - /* - * if any set that we aren't ignoring; only - * make the complaint once, in case it's stuck - * or recurring, and we get here multiple - * times. - */ - if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Hardware Error (freeze " - "mode), no longer usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - /* mark as having had error */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - /* - * mark as not usable, at a minimum until driver - * is reloaded, probably until reboot, since no - * other reset is possible. - */ - dd->ipath_flags &= ~IPATH_INITTED; - } else { - ipath_dbg("Clearing freezemode on ignored hardware " - "error\n"); + if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd)) + hwerrs &= ~TXE_PIO_PARITY; + if (hwerrs & RXE_EAGER_PARITY) + ipath_dev_err(dd, "RXE parity, Eager TID error is not " + "recoverable\n"); + if (!hwerrs) { + ipath_dbg("Clearing freezemode on ignored or " + "recovered hardware error\n"); ctrl &= ~INFINIPATH_C_FREEZEMODE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, ctrl); @@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - ipath_dev_err(dd, "%s hardware error\n", msg); + if (hwerrs) { + /* + * if any set that we aren't ignoring; only + * make the complaint once, in case it's stuck + * or recurring, and we get here multiple + * times. + * force link down, so switch knows, and + * LEDs are turned off + */ + if (dd->ipath_flags & IPATH_INITTED) { + ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); + ipath_setup_ht_setextled(dd, + INFINIPATH_IBCS_L_STATE_DOWN, + INFINIPATH_IBCS_LT_STATE_DISABLED); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); + isfatal = 1; + } + *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + /* mark as having had error */ + *dd->ipath_statusp |= IPATH_STATUS_HWERROR; + /* + * mark as not usable, at a minimum until driver + * is reloaded, probably until reboot, since no + * other reset is possible. + */ + dd->ipath_flags &= ~IPATH_INITTED; + } + else + *msg = 0; /* recovered from all of them */ + if (*msg) + ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) /* * for status file; if no trailing brace is copied, @@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, if (n) snprintf(name, namelen, "%s", n); - if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { + if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || + dd->ipath_minrev > 3)) { /* * This version of the driver only supports Rev 3.2 and 3.3 */ @@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) ipath_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT) + ipath_dbg("MemBIST corrected\n"); ipath_check_htlink(dd); @@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, u32 type, unsigned long pa) { + if (!dd->ipath_kregbase) + return; + if (pa != dd->ipath_tidinvalid) { if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { dev_info(&dd->pcidev->dev, @@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, pa |= lenvalid | INFINIPATH_RT_VALID; } } - if (dd->ipath_kregbase) - writeq(pa, tidptr); + writeq(pa, tidptr); } + /** * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager * @dd: the infinipath device @@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) INFINIPATH_S_ABORT); ipath_get_eeprom_info(dd); - if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && + if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { /* * Later production QHT7040 has same changes as QHT7140, so @@ -1528,13 +1548,31 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) return 0; } + +static int ipath_ht_txe_recover(struct ipath_devdata *dd) +{ + int cnt = ++ipath_stats.sps_txeparity; + if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { + if (cnt == IPATH_MAX_PARITY_ATTEMPTS) + ipath_dev_err(dd, + "Too many attempts to recover from " + "TXE parity, giving up\n"); + return 0; + } + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); + ipath_disarm_senderrbufs(dd, 1); + return 1; +} + + /** * ipath_init_ht_get_base_info - set chip-specific flags for user code * @dd: the infinipath device * @kbase: ipath_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs - * HyperTransport can affect some user packet algorithims. + * HyperTransport can affect some user packet algorithms. */ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) { diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index ae8bf9950c6d..1b9c30857754 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -43,6 +43,8 @@ #include "ipath_kernel.h" #include "ipath_registers.h" +static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64); + /* * This file contains all the chip-specific register information and * access functions for the QLogic InfiniPath PCI-Express chip. @@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = { .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg), /* - * These should not be used directly via ipath_read_kreg64(), - * use them with ipath_read_kreg64_port() + * These should not be used directly via ipath_write_kreg64(), + * use them with ipath_write_kreg64_port(), */ .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), @@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), }; +#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) + +static int ipath_pe_txe_recover(struct ipath_devdata *); + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * occur if a processor speculative read is done to the PIO * buffer while we are sending a packet, for example. */ - if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - ipath_stats.sps_txeparity++; - ipath_dbg("Recovering from TXE parity error (%llu), " - "hwerrstatus=%llx\n", - (unsigned long long) ipath_stats.sps_txeparity, - (unsigned long long) hwerrs); - ipath_disarm_senderrbufs(dd); - hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - if (!hwerrs) { /* else leave in freeze mode */ - ipath_write_kreg(dd, - dd->ipath_kregs->kr_control, - dd->ipath_control); - return; - } - } + if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd)) + hwerrs &= ~TXE_PIO_PARITY; if (hwerrs) { /* * if any set that we aren't ignoring only make the * complaint once, in case it's stuck or recurring, * and we get here multiple times + * Force link down, so switch knows, and + * LEDs are turned off */ if (dd->ipath_flags & IPATH_INITTED) { + ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); + ipath_setup_pe_setextled(dd, + INFINIPATH_IBCS_L_STATE_DOWN, + INFINIPATH_IBCS_LT_STATE_DISABLED); ipath_dev_err(dd, "Fatal Hardware Error (freeze " "mode), no longer usable, SN %.16s\n", dd->ipath_serial); @@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - ipath_dev_err(dd, "%s hardware error\n", msg); + if (*msg) + ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { /* * for /sys status file ; if no trailing } is copied, we'll @@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) ipath_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND) + ipath_dbg("MemBIST corrected\n"); val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ @@ -1293,7 +1293,7 @@ int __attribute__((weak)) ipath_unordered_wc(void) * @kbase: ipath_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs - * HyperTransport can affect some user packet algorithims. + * HyperTransport can affect some user packet algorithms. */ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) { @@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd) dd->ipath_irq = 0; } +/* + * On platforms using this chip, and not having ordered WC stores, we + * can get TXE parity errors due to speculative reads to the PIO buffers, + * and this, due to a chip bug can result in (many) false parity error + * reports. So it's a debug print on those, and an info print on systems + * where the speculative reads don't occur. + * Because we can get lots of false errors, we have no upper limit + * on recovery attempts on those platforms. + */ +static int ipath_pe_txe_recover(struct ipath_devdata *dd) +{ + if (ipath_unordered_wc()) + ipath_dbg("Recovering from TXE PIO parity error\n"); + else { + int cnt = ++ipath_stats.sps_txeparity; + if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { + if (cnt == IPATH_MAX_PARITY_ATTEMPTS) + ipath_dev_err(dd, + "Too many attempts to recover from " + "TXE parity, giving up\n"); + return 0; + } + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); + } + ipath_disarm_senderrbufs(dd, 1); + return 1; +} + /** * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index d4f6b5239ef8..7045ba689494 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd) return ret; } +static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd) +{ + struct ipath_portdata *pd = NULL; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (pd) { + pd->port_dd = dd; + pd->port_cnt = 1; + /* The port 0 pkey table is used by the layer interface. */ + pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY; + } + return pd; +} + static int init_chip_first(struct ipath_devdata *dd, struct ipath_portdata **pdp) { @@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd, goto done; } - dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL); + pd = create_portdata0(dd); - if (!dd->ipath_pd[0]) { + if (!pd) { ipath_dev_err(dd, "Unable to allocate portdata for port " "0, failing\n"); ret = -ENOMEM; goto done; } - pd = dd->ipath_pd[0]; - pd->port_dd = dd; - pd->port_port = 0; - pd->port_cnt = 1; - /* The port 0 pkey table is used by the layer interface. */ - pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY; + dd->ipath_pd[0] = pd; + dd->ipath_rcvtidcnt = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); dd->ipath_rcvtidbase = @@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd, goto done; } + + /* clear diagctrl register, in case diags were running and crashed */ + ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0); + /* clear the initial reset flag, in case first driver load */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, INFINIPATH_E_RESET); @@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) { int ret = 0, i; u32 val32, kpiobufs; + u32 piobufs, uports; u64 val; struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ gfp_t gfp_flags = GFP_USER | __GFP_COMP; @@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) * the in memory DMA'ed copies of the registers. This has to * be done early, before we calculate lastport, etc. */ - val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; /* * calc number of pioavail registers, and save it; we have 2 * bits per buffer. */ - dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) + dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / (sizeof(u64) * BITS_PER_BYTE / 2); + uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; if (ipath_kpiobufs == 0) { /* not set by user (this is default) */ - if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) + if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32) kpiobufs = 32; else kpiobufs = 16; @@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) else kpiobufs = ipath_kpiobufs; - if (kpiobufs > - (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - - (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) { - i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - - (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT); + if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { + i = (int) piobufs - + (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); if (i < 0) i = 0; - dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for " - "kernel leaves too few for %d user ports " + dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " + "%d for kernel leaves too few for %d user ports " "(%d each); using %u\n", kpiobufs, - dd->ipath_cfgports - 1, - IPATH_MIN_USER_PORT_BUFCNT, i); + piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); /* * shouldn't change ipath_kpiobufs, because could be * different for different devices... */ kpiobufs = i; } - dd->ipath_lastport_piobuf = - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs; - dd->ipath_pbufsport = dd->ipath_cfgports > 1 - ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1) - : 0; - val32 = dd->ipath_lastport_piobuf - - (dd->ipath_pbufsport * (dd->ipath_cfgports - 1)); + dd->ipath_lastport_piobuf = piobufs - kpiobufs; + dd->ipath_pbufsport = + uports ? dd->ipath_lastport_piobuf / uports : 0; + val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports); if (val32 > 0) { ipath_dbg("allocating %u pbufs/port leaves %u unused, " "add to kernel\n", dd->ipath_pbufsport, val32); @@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_lastpioindex = dd->ipath_lastport_piobuf; ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " "each for %u user ports\n", kpiobufs, - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k, - dd->ipath_pbufsport, dd->ipath_cfgports - 1); + piobufs, dd->ipath_pbufsport, uports); dd->ipath_f_early_init(dd); @@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing * re-init, the simplest way to handle this is to free * existing, and re-allocate. + * Need to re-create rest of port 0 portdata as well. */ if (reinit) { - struct ipath_portdata *pd = dd->ipath_pd[0]; - dd->ipath_pd[0] = NULL; - ipath_free_pddata(dd, pd); + /* Alloc and init new ipath_portdata for port0, + * Then free old pd. Could lead to fragmentation, but also + * makes later support for hot-swap easier. + */ + struct ipath_portdata *npd; + npd = create_portdata0(dd); + if (npd) { + ipath_free_pddata(dd, pd); + dd->ipath_pd[0] = pd = npd; + } else { + ipath_dev_err(dd, "Unable to allocate portdata for" + " port 0, failing\n"); + ret = -ENOMEM; + goto done; + } } dd->ipath_f_tidtemplate(dd); ret = ipath_create_rcvhdrq(dd, pd); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 72b9e279d19d..45d033169c6e 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -38,10 +38,39 @@ #include "ipath_common.h" /* + * clear (write) a pio buffer, to clear a parity error. This routine + * should only be called when in freeze mode, and the buffer should be + * canceled afterwards. + */ +static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) +{ + u32 __iomem *pbuf; + u32 dwcnt; /* dword count to write */ + if (pnum < dd->ipath_piobcnt2k) { + pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum * + dd->ipath_palign); + dwcnt = dd->ipath_piosize2k >> 2; + } + else { + pbuf = (u32 __iomem *) (dd->ipath_pio4kbase + + (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign); + dwcnt = dd->ipath_piosize4k >> 2; + } + dev_info(&dd->pcidev->dev, + "Rewrite PIO buffer %u, to recover from parity error\n", + pnum); + *pbuf = dwcnt+1; /* no flush required, since already in freeze */ + while(--dwcnt) + *pbuf++ = 0; +} + +/* * Called when we might have an error that is specific to a particular * PIO buffer, and may need to cancel that buffer, so it can be re-used. + * If rewrite is true, and bits are set in the sendbufferror registers, + * we'll write to the buffer, for error recovery on parity errors. */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd) +void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) { u32 piobcnt; unsigned long sbuf[4]; @@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd) } for (i = 0; i < piobcnt; i++) - if (test_bit(i, sbuf)) + if (test_bit(i, sbuf)) { + if (rewrite) + ipath_clrpiobuf(dd, i); ipath_disarm_piobufs(dd, i, 1); + } dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ } } @@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) { u64 ignore_this_time = 0; - ipath_disarm_senderrbufs(dd); + ipath_disarm_senderrbufs(dd, 0); if ((errs & E_SUM_LINK_PKTERRS) && !(dd->ipath_flags & IPATH_LINKACTIVE)) { /* @@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd, * happens so often we never want to count it. */ if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { - ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror & - ~INFINIPATH_E_IBSTATUSCHANGED); + int iserr; + iserr = ipath_decode_err(msg, sizeof msg, + dd->ipath_lasterror & + ~INFINIPATH_E_IBSTATUSCHANGED); if (dd->ipath_lasterror & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) + ~(INFINIPATH_E_RRCVEGRFULL | + INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) ipath_dev_err(dd, "Suppressed %u messages for " "fast-repeating errors (%s) (%llx)\n", supp_msgs, msg, @@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd, * them. So only complain about these at debug * level. */ - ipath_dbg("Suppressed %u messages for %s\n", - supp_msgs, msg); + if (iserr) + ipath_dbg("Suppressed %u messages for %s\n", + supp_msgs, msg); + else + ipath_cdbg(ERRPKT, + "Suppressed %u messages for %s\n", + supp_msgs, msg); } } } @@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) { char msg[512]; u64 ignore_this_time = 0; - int i; + int i, iserr = 0; int chkerrpkts = 0, noprint = 0; unsigned supp_msgs; @@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) } if (supp_msgs == 250000) { + int s_iserr; /* * It's not entirely reasonable assuming that the errors set * in the last clear period are all responsible for the @@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) dd->ipath_maskederrs |= dd->ipath_lasterror | errs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, ~dd->ipath_maskederrs); - ipath_decode_err(msg, sizeof msg, + s_iserr = ipath_decode_err(msg, sizeof msg, (dd->ipath_maskederrs & ~dd-> ipath_ignorederrs)); if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) - ipath_dev_err(dd, "Disabling error(s) %llx because " - "occurring too frequently (%s)\n", - (unsigned long long) - (dd->ipath_maskederrs & - ~dd->ipath_ignorederrs), msg); + ~(INFINIPATH_E_RRCVEGRFULL | + INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) + ipath_dev_err(dd, "Temporarily disabling " + "error(s) %llx reporting; too frequent (%s)\n", + (unsigned long long) (dd->ipath_maskederrs & + ~dd->ipath_ignorederrs), msg); else { /* * rcvegrfull and rcvhdrqfull are "normal", @@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * processing them. So only complain about * these at debug level. */ - ipath_dbg("Disabling frequent queue full errors " - "(%s)\n", msg); + if (s_iserr) + ipath_dbg("Temporarily disabling reporting " + "too frequent queue full errors (%s)\n", + msg); + else + ipath_cdbg(ERRPKT, + "Temporarily disabling reporting too" + " frequent packet errors (%s)\n", + msg); } /* @@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) ipath_stats.sps_crcerrs++; chkerrpkts = 1; } + iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS); + /* * We don't want to print these two as they happen, or we can make @@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; } - if (!noprint && *msg) - ipath_dev_err(dd, "%s error\n", msg); + if (!noprint && *msg) { + if (iserr) + ipath_dev_err(dd, "%s error\n", msg); + else + dev_info(&dd->pcidev->dev, "%s packet problems\n", + msg); + } if (dd->ipath_state_wanted & dd->ipath_flags) { ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " "waking\n", dd->ipath_state_wanted, @@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) struct ipath_portdata *pd = dd->ipath_pd[i]; if (portr & (1 << i) && pd && pd->port_cnt && test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - int rcbit; clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; - clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); + clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, + &dd->ipath_rcvctrl); wake_up_interruptible(&pd->port_wait); rcvdint = 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 6d8d05fb5999..e900c2593f44 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); -void ipath_disarm_senderrbufs(struct ipath_devdata *); struct file_operations; int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, @@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); extern int ipath_diag_inuse; irqreturn_t ipath_intr(int irq, void *devid); -void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); +int ipath_decode_err(char *buf, size_t blen, ipath_err_t err); #if __IPATH_INFO || __IPATH_DBG extern const char *ipath_ibcstatus_str[]; #endif @@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); #define IPATH_PORT_WAITING_RCV 2 /* waiting for a PIO buffer to be available */ #define IPATH_PORT_WAITING_PIO 3 + /* master has not finished initializing */ +#define IPATH_PORT_MASTER_UNINIT 4 /* free up any allocated data at closes */ void ipath_free_data(struct ipath_portdata *dd); @@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *); void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); +void ipath_disarm_senderrbufs(struct ipath_devdata *, int); /* * number of words used for protocol header if not set by ipath_userinit(); @@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int); /* these are used for the registers that vary with port */ void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, unsigned, u64); -u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg, - unsigned); /* * We could have a single register get/put routine, that takes a group type, @@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); extern unsigned ipath_debug; /* debugging bit mask */ +#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */ + const char *ipath_get_unit_name(int unit); extern struct mutex ipath_mutex; diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 851763d7d2db..dd487c100f5b 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) r = (r + 1) & (rkt->max - 1); if (r == n) { spin_unlock_irqrestore(&rkt->lock, flags); - ipath_dbg(KERN_INFO "LKEY table full\n"); + ipath_dbg("LKEY table full\n"); ret = 0; goto bail; } @@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, * being reversible by calling bus_to_virt(). */ if (sge->lkey == 0) { + struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + + if (pd->user) { + ret = 0; + goto bail; + } isge->mr = NULL; isge->vaddr = (void *) sge->addr; isge->length = sge->length; @@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, * (see ipath_get_dma_mr and ipath_dma.c). */ if (rkey == 0) { + struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + + if (pd->user) { + ret = 0; + goto bail; + } sge->mr = NULL; sge->vaddr = (void *) vaddr; sge->length = len; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 8cc8598d6c69..31e70732e369 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, m = 0; n = 0; list_for_each_entry(chunk, ®ion->chunk_list, list) { - for (i = 0; i < chunk->nmap; i++) { - mr->mr.map[m]->segs[n].vaddr = - page_address(chunk->page_list[i].page); + for (i = 0; i < chunk->nents; i++) { + void *vaddr; + + vaddr = page_address(chunk->page_list[i].page); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = region->page_size; n++; if (n == IPATH_SEGSZ) { diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 64f07b19349f..16db9ac0b402 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -81,11 +81,51 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static u32 alloc_qpn(struct ipath_qp_table *qpt) + +static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long flags; + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock_irqsave(&qpt->lock, flags); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock_irqrestore(&qpt->lock, flags); +} + + +static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type) { u32 i, offset, max_scan, qpn; struct qpn_map *map; - u32 ret; + u32 ret = -1; + + if (type == IB_QPT_SMI) + ret = 0; + else if (type == IB_QPT_GSI) + ret = 1; + + if (ret != -1) { + map = &qpt->map[0]; + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) { + ret = -ENOMEM; + goto bail; + } + } + if (!test_and_set_bit(ret, map->page)) + atomic_dec(&map->n_free); + else + ret = -EBUSY; + goto bail; + } qpn = qpt->last + 1; if (qpn >= QPN_MAX) @@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt) max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - unsigned long page = get_zeroed_page(GFP_KERNEL); - unsigned long flags; - - /* - * Free the page if someone raced with us - * installing it: - */ - spin_lock_irqsave(&qpt->lock, flags); - if (map->page) - free_page(page); - else - map->page = (void *)page; - spin_unlock_irqrestore(&qpt->lock, flags); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt) qpn = mk_qpn(qpt, map, offset); } - ret = 0; + ret = -ENOMEM; bail: return ret; @@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, enum ib_qp_type type) { unsigned long flags; - u32 qpn; int ret; - if (type == IB_QPT_SMI) - qpn = 0; - else if (type == IB_QPT_GSI) - qpn = 1; - else { - /* Allocate the next available QPN */ - qpn = alloc_qpn(qpt); - if (qpn == 0) { - ret = -ENOMEM; - goto bail; - } - } - qp->ibqp.qp_num = qpn; + ret = alloc_qpn(qpt, type); + if (ret < 0) + goto bail; + qp->ibqp.qp_num = ret; /* Add the QP to the hash table. */ spin_lock_irqsave(&qpt->lock, flags); - qpn %= qpt->max; - qp->next = qpt->table[qpn]; - qpt->table[qpn] = qp; + ret %= qpt->max; + qp->next = qpt->table[ret]; + qpt->table[ret] = qp; atomic_inc(&qp->refcount); spin_unlock_irqrestore(&qpt->lock, flags); @@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) if (!fnd) return; - /* If QPN is not reserved, mark QPN free in the bitmap. */ - if (qp->ibqp.qp_num > 1) - free_qpn(qpt, qp->ibqp.qp_num); + free_qpn(qpt, qp->ibqp.qp_num); wait_event(qp->wait, !atomic_read(&qp->refcount)); } @@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt) while (qp) { nqp = qp->next; - if (qp->ibqp.qp_num > 1) - free_qpn(qpt, qp->ibqp.qp_num); + free_qpn(qpt, qp->ibqp.qp_num); if (!atomic_dec_and_test(&qp->refcount) || !ipath_destroy_qp(&qp->ibqp)) - ipath_dbg(KERN_INFO "QP memory leak!\n"); + ipath_dbg("QP memory leak!\n"); qp = nqp; } } @@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; - clear_bit(IPATH_S_BUSY, &qp->s_flags); + qp->s_busy = 0; + qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_psn = 0; qp->r_psn = 0; @@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->r_state = IB_OPCODE_UC_SEND_LAST; } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; qp->r_wrid_valid = 0; qp->s_rnr_timeout = 0; @@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_ssn = 1; qp->s_lsn = 0; qp->s_wait_credit = 0; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; if (qp->r_rq.wq) { qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; @@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. - * QP s_lock should be held and interrupts disabled. + * The QP s_lock should be held and interrupts disabled. */ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) @@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; - ipath_dbg(KERN_INFO "QP%d/%d in error state\n", + ipath_dbg("QP%d/%d in error state\n", qp->ibqp.qp_num, qp->remote_qpn); spin_lock(&dev->pending_lock); @@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) wc.port_num = 0; if (qp->r_wrid_valid) { qp->r_wrid_valid = 0; + wc.wr_id = qp->r_wr_id; + wc.opcode = IB_WC_RECV; wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); } @@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->path_mig_state != IB_MIG_REARM) goto inval; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) + goto inval; + switch (new_state) { case IB_QPS_RESET: ipath_reset_qp(qp); break; case IB_QPS_ERR: - ipath_error_qp(qp, IB_WC_GENERAL_ERR); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: @@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + qp->state = new_state; spin_unlock_irqrestore(&qp->s_lock, flags); @@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->alt_pkey_index = 0; attr->en_sqd_async_notify = 0; attr->sq_draining = 0; - attr->max_rd_atomic = 1; - attr->max_dest_rd_atomic = 1; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; attr->min_rnr_timer = qp->r_min_rnr_timer; attr->port_num = 1; attr->timeout = qp->timeout; @@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) + if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR) init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; else init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; @@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; + qp->s_flags = IPATH_S_SIGNAL_REQ_WR; else qp->s_flags = 0; dev = to_idev(ibpd->device); @@ -958,7 +989,7 @@ bail: * @wc: the WC responsible for putting the QP in this state * * Flushes the send work queue. - * The QP s_lock should be held. + * The QP s_lock should be held and interrupts disabled. */ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) @@ -966,7 +997,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", + ipath_dbg("Send queue error on QP%d/%d: err: %d\n", qp->ibqp.qp_num, qp->remote_qpn, wc->status); spin_lock(&dev->pending_lock); @@ -984,12 +1015,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) wc->status = IB_WC_WR_FLUSH_ERR; while (qp->s_last != qp->s_head) { + wqe = get_swqe_ptr(qp, qp->s_last); wc->wr_id = wqe->wr.wr_id; wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); if (++qp->s_last >= qp->s_size) qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); } qp->s_cur = qp->s_tail = qp->s_head; qp->state = IB_QPS_SQE; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5ff20cb04494..b4b88d0b53f5 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -37,6 +37,19 @@ /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_RC_##x +static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, + u32 psn, u32 pmtu) +{ + u32 len; + + len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; + ss->sge = wqe->sg_list[0]; + ss->sg_list = wqe->sg_list + 1; + ss->num_sge = wqe->wr.num_sge; + ipath_skip_sge(ss, len); + return wqe->length - len; +} + /** * ipath_init_restart- initialize the qp->s_sge after a restart * @qp: the QP who's SGE we're restarting @@ -47,15 +60,9 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) { struct ipath_ibdev *dev; - u32 len; - len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * - ib_mtu_enum_to_int(qp->path_mtu); - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - ipath_skip_sge(&qp->s_sge, len); - qp->s_len = wqe->length - len; + qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, + ib_mtu_enum_to_int(qp->path_mtu)); dev = to_idev(qp->ibqp.device); spin_lock(&dev->pending_lock); if (list_empty(&qp->timerwait)) @@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) * @ohdr: a pointer to the IB header being constructed * @pmtu: the path MTU * - * Return bth0 if constructed; otherwise, return 0. + * Return 1 if constructed; otherwise, return 0. + * Note that we are in the responder's side of the QP context. * Note the QP s_lock must be held. */ -u32 ipath_make_rc_ack(struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 pmtu) +static int ipath_make_rc_ack(struct ipath_qp *qp, + struct ipath_other_headers *ohdr, + u32 pmtu, u32 *bth0p, u32 *bth2p) { + struct ipath_ack_entry *e; u32 hwords; u32 len; u32 bth0; + u32 bth2; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; - /* - * Send a response. Note that we are in the responder's - * side of the QP context. - */ switch (qp->s_ack_state) { - case OP(RDMA_READ_REQUEST): - qp->s_cur_sge = &qp->s_rdma_sge; - len = qp->s_rdma_len; - if (len > pmtu) { - len = pmtu; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); - } else - qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); - qp->s_rdma_len -= len; + case OP(RDMA_READ_RESPONSE_LAST): + case OP(RDMA_READ_RESPONSE_ONLY): + case OP(ATOMIC_ACKNOWLEDGE): + qp->s_ack_state = OP(ACKNOWLEDGE); + /* FALLTHROUGH */ + case OP(ACKNOWLEDGE): + /* Check for no next entry in the queue. */ + if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { + if (qp->s_flags & IPATH_S_ACK_PENDING) + goto normal; + goto bail; + } + + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST)) { + /* Copy SGE state in case we need to resend */ + qp->s_ack_rdma_sge = e->rdma_sge; + qp->s_cur_sge = &qp->s_ack_rdma_sge; + len = e->rdma_sge.sge.sge_length; + if (len > pmtu) { + len = pmtu; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); + } else { + qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); + if (++qp->s_tail_ack_queue > + IPATH_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; + } + ohdr->u.aeth = ipath_compute_aeth(qp); + hwords++; + qp->s_ack_rdma_psn = e->psn; + bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; + } else { + /* COMPARE_SWAP or FETCH_ADD */ + qp->s_cur_sge = NULL; + len = 0; + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); + ohdr->u.at.aeth = ipath_compute_aeth(qp); + ohdr->u.at.atomic_ack_eth[0] = + cpu_to_be32(e->atomic_data >> 32); + ohdr->u.at.atomic_ack_eth[1] = + cpu_to_be32(e->atomic_data); + hwords += sizeof(ohdr->u.at) / sizeof(u32); + bth2 = e->psn; + if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; + } bth0 = qp->s_ack_state << 24; - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; break; case OP(RDMA_READ_RESPONSE_FIRST): qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): - qp->s_cur_sge = &qp->s_rdma_sge; - len = qp->s_rdma_len; + len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; else { ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; } - qp->s_rdma_len -= len; bth0 = qp->s_ack_state << 24; - break; - - case OP(RDMA_READ_RESPONSE_LAST): - case OP(RDMA_READ_RESPONSE_ONLY): - /* - * We have to prevent new requests from changing - * the r_sge state while a ipath_verbs_send() - * is in progress. - */ - qp->s_ack_state = OP(ACKNOWLEDGE); - bth0 = 0; - goto bail; - - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): - qp->s_cur_sge = NULL; - len = 0; - /* - * Set the s_ack_state so the receive interrupt handler - * won't try to send an ACK (out of order) until this one - * is actually sent. - */ - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24; - ohdr->u.at.aeth = ipath_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); - hwords += sizeof(ohdr->u.at) / 4; + bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; break; default: - /* Send a regular ACK. */ - qp->s_cur_sge = NULL; - len = 0; + normal: /* - * Set the s_ack_state so the receive interrupt handler - * won't try to send an ACK (out of order) until this one - * is actually sent. + * Send a regular ACK. + * Set the s_ack_state so we wait until after sending + * the ACK before setting s_ack_state to ACKNOWLEDGE + * (see above). */ - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - bth0 = OP(ACKNOWLEDGE) << 24; + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); + qp->s_flags &= ~IPATH_S_ACK_PENDING; + qp->s_cur_sge = NULL; if (qp->s_nak_state) - ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | - (qp->s_nak_state << - IPATH_AETH_CREDIT_SHIFT)); + ohdr->u.aeth = + cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | + (qp->s_nak_state << + IPATH_AETH_CREDIT_SHIFT)); else ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; + len = 0; + bth0 = OP(ACKNOWLEDGE) << 24; + bth2 = qp->s_ack_psn & IPATH_PSN_MASK; } qp->s_hdrwords = hwords; qp->s_cur_size = len; + *bth0p = bth0; + *bth2p = bth2; + return 1; bail: - return bth0; + return 0; } /** @@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, u32 bth2; char newreq; + /* Sending responses has higher priority over sending requests. */ + if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || + (qp->s_flags & IPATH_S_ACK_PENDING) || + qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) && + ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) + goto done; + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || qp->s_rnr_timeout) - goto done; + goto bail; /* Limit the number of packets sent without an ACK. */ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { @@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, list_add_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); - goto done; + goto bail; } /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, if (qp->s_cur == qp->s_tail) { /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) - goto done; + goto bail; + /* + * If a fence is requested, wait for previous + * RDMA read and atomic operations to finish. + */ + if ((wqe->wr.send_flags & IB_SEND_FENCE) && + qp->s_num_rd_atomic) { + qp->s_flags |= IPATH_S_FENCE_PENDING; + goto bail; + } wqe->psn = qp->s_next_psn; newreq = 1; } @@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) - goto done; + goto bail; wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) - goto done; + goto bail; ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); - hwords += sizeof(struct ib_reth) / 4; + hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp, break; case IB_WR_RDMA_READ: - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / 4; + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= IPATH_S_RDMAR_PENDING; + goto bail; + } + qp->s_num_rd_atomic++; if (qp->s_lsn != (u32) -1) qp->s_lsn++; /* @@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_next_psn += (len - 1) / pmtu; wqe->lpsn = qp->s_next_psn++; } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); ss = NULL; len = 0; if (++qp->s_cur == qp->s_size) @@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp, case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) - qp->s_state = OP(COMPARE_SWAP); - else - qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.vaddr = cpu_to_be64( - wqe->wr.wr.atomic.remote_addr); - ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); - hwords += sizeof(struct ib_atomic_eth) / 4; + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= IPATH_S_RDMAR_PENDING; + goto bail; + } + qp->s_num_rd_atomic++; if (qp->s_lsn != (u32) -1) qp->s_lsn++; wqe->lpsn = wqe->psn; } - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + qp->s_state = OP(COMPARE_SWAP); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.swap); + ohdr->u.atomic_eth.compare_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + } else { + qp->s_state = OP(FETCH_ADD); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + ohdr->u.atomic_eth.compare_data = 0; + } + ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr >> 32); + ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr); + ohdr->u.atomic_eth.rkey = cpu_to_be32( + wqe->wr.wr.atomic.rkey); + hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; break; default: - goto done; + goto bail; } qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; @@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_psn = wqe->lpsn + 1; else { qp->s_psn++; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; } /* @@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; @@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; @@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp, cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / 4; + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = NULL; len = 0; @@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, if (qp->s_cur == qp->s_size) qp->s_cur = 0; break; - - case OP(RDMA_READ_REQUEST): - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): - /* - * We shouldn't start anything new until this request is - * finished. The ACK will handle rescheduling us. XXX The - * number of outstanding ones is negotiated at connection - * setup time (see pg. 258,289)? XXX Also, if we support - * multiple outstanding requests, we need to check the WQE - * IB_SEND_FENCE flag and not send a new request if a RDMA - * read or atomic is pending. - */ - goto done; } if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) bth2 |= 1 << 31; /* Request ACK. */ @@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_cur_size = len; *bth0p = bth0 | (qp->s_state << 24); *bth2p = bth2; +done: return 1; -done: +bail: return 0; } @@ -524,7 +576,8 @@ done: * * This is called from ipath_rc_rcv() and only uses the receive * side QP state. - * Note that RDMA reads are handled in the send side QP state and tasklet. + * Note that RDMA reads and atomics are handled in the + * send side QP state and tasklet. */ static void send_rc_ack(struct ipath_qp *qp) { @@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp) struct ipath_ib_header hdr; struct ipath_other_headers *ohdr; + /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ + if (qp->r_head_ack_queue != qp->s_tail_ack_queue) + goto queue_ack; + /* Construct the header. */ ohdr = &hdr.u.oth; lrh0 = IPATH_LRH_BTH; @@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp) lrh0 = IPATH_LRH_GRH; } /* read pkey_index w/o lock (its atomic) */ - bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); + bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | + OP(ACKNOWLEDGE) << 24; if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << IPATH_AETH_CREDIT_SHIFT)); else ohdr->u.aeth = ipath_compute_aeth(qp); - if (qp->r_ack_state >= OP(COMPARE_SWAP)) { - bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24; - ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); - hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; - } else - bth0 |= OP(ACKNOWLEDGE) << 24; lrh0 |= qp->remote_ah_attr.sl << 4; hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); @@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp) * If we can send the ACK, clear the ACK state. */ if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { - qp->r_ack_state = OP(ACKNOWLEDGE); dev->n_unicast_xmit++; - } else { - /* - * We are out of PIO buffers at the moment. - * Pass responsibility for sending the ACK to the - * send tasklet so that when a PIO buffer becomes - * available, the ACK is sent ahead of other outgoing - * packets. - */ - dev->n_rc_qacks++; - spin_lock_irq(&qp->s_lock); - /* Don't coalesce if a RDMA read or atomic is pending. */ - if (qp->s_ack_state == OP(ACKNOWLEDGE) || - qp->s_ack_state < OP(RDMA_READ_REQUEST)) { - qp->s_ack_state = qp->r_ack_state; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; - qp->r_ack_state = OP(ACKNOWLEDGE); - } - spin_unlock_irq(&qp->s_lock); - - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + goto done; } + + /* + * We are out of PIO buffers at the moment. + * Pass responsibility for sending the ACK to the + * send tasklet so that when a PIO buffer becomes + * available, the ACK is sent ahead of other outgoing + * packets. + */ + dev->n_rc_qacks++; + +queue_ack: + spin_lock_irq(&qp->s_lock); + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + spin_unlock_irq(&qp->s_lock); + + /* Call ipath_do_rc_send() in another thread. */ + tasklet_hi_schedule(&qp->s_task); + +done: + return; } /** @@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) if (wqe->wr.opcode == IB_WR_RDMA_READ) dev->n_rc_resends++; else - dev->n_rc_resends += (int)qp->s_psn - (int)psn; + dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); tasklet_hi_schedule(&qp->s_task); @@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); - /* Nothing is pending to ACK/NAK. */ - if (unlikely(qp->s_last == qp->s_tail)) - goto bail; - /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && (opcode != OP(RDMA_READ_RESPONSE_LAST) || - ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || + ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || @@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ goto bail; } - if (wqe->wr.opcode == IB_WR_RDMA_READ || - wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - tasklet_hi_schedule(&qp->s_task); + if (qp->s_num_rd_atomic && + (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { + qp->s_num_rd_atomic--; + /* Restart sending task if fence is complete */ + if ((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) { + qp->s_flags &= ~IPATH_S_FENCE_PENDING; + tasklet_hi_schedule(&qp->s_task); + } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { + qp->s_flags &= ~IPATH_S_RDMAR_PENDING; + tasklet_hi_schedule(&qp->s_task); + } + } /* Post a send completion queue entry if requested. */ - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.vendor_err = 0; wc.byte_len = wqe->length; + wc.imm_data = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; + wc.wc_flags = 0; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; @@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) if (qp->s_last == qp->s_cur) { if (++qp->s_cur >= qp->s_size) qp->s_cur = 0; + qp->s_last = qp->s_cur; + if (qp->s_last == qp->s_tail) + break; wqe = get_swqe_ptr(qp, qp->s_cur); qp->s_state = OP(SEND_LAST); qp->s_psn = wqe->psn; + } else { + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + if (qp->s_last == qp->s_tail) + break; + wqe = get_swqe_ptr(qp, qp->s_last); } - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); - if (qp->s_last == qp->s_tail) - break; } switch (aeth >> 29) { @@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) list_add_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); + /* + * If we get a partial ACK for a resent operation, + * we can stop resending the earlier packets and + * continue with the next packet the receiver wants. + */ + if (ipath_cmp24(qp->s_psn, psn) <= 0) { + reset_psn(qp, psn + 1); + tasklet_hi_schedule(&qp->s_task); + } + } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = psn + 1; } ipath_get_credit(qp, aeth); qp->s_rnr_retry = qp->s_rnr_retry_cnt; @@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) case 1: /* RNR NAK */ dev->n_rnr_naks++; + if (qp->s_last == qp->s_tail) + goto bail; if (qp->s_rnr_retry == 0) { - if (qp->s_last == qp->s_tail) - goto bail; - wc.status = IB_WC_RNR_RETRY_EXC_ERR; goto class_b; } if (qp->s_rnr_retry_cnt < 7) qp->s_rnr_retry--; - if (qp->s_last == qp->s_tail) - goto bail; /* The last valid PSN is the previous PSN. */ update_last_psn(qp, psn - 1); - dev->n_rc_resends += (int)qp->s_psn - (int)psn; + if (wqe->wr.opcode == IB_WR_RDMA_READ) + dev->n_rc_resends++; + else + dev->n_rc_resends += + (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); @@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) goto bail; case 3: /* NAK */ - /* The last valid PSN seen is the previous request's. */ - if (qp->s_last != qp->s_tail) - update_last_psn(qp, wqe->psn - 1); + if (qp->s_last == qp->s_tail) + goto bail; + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ dev->n_seq_naks++; /* - * Back up to the responder's expected PSN. XXX + * Back up to the responder's expected PSN. * Note that we might get a NAK in the middle of an * RDMA READ response which terminates the RDMA * READ. */ - if (qp->s_last == qp->s_tail) - break; - - if (ipath_cmp24(psn, wqe->psn) < 0) - break; - - /* Retry the request. */ ipath_restart_rc(qp, psn, &wc); break; @@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, u32 psn, u32 hdrsize, u32 pmtu, int header_in_data) { + struct ipath_swqe *wqe; unsigned long flags; struct ib_wc wc; int diff; @@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto ack_done; } + if (unlikely(qp->s_last == qp->s_tail)) + goto ack_done; + wqe = get_swqe_ptr(qp, qp->s_last); + switch (opcode) { case OP(ACKNOWLEDGE): case OP(ATOMIC_ACKNOWLEDGE): @@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, aeth = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); } - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) - *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { + u64 val; + + if (!header_in_data) { + __be32 *p = ohdr->u.at.atomic_ack_eth; + + val = ((u64) be32_to_cpu(p[0]) << 32) | + be32_to_cpu(p[1]); + } else + val = be64_to_cpu(((__be64 *) data)[0]); + *(u64 *) wqe->sg_list[0].vaddr = val; + } if (!do_rc_ack(qp, aeth, psn, opcode) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; hdrsize += 4; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; /* - * do_rc_ack() has already checked the PSN so skip - * the sequence check. + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. */ - goto rdma_read; + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_middle; case OP(RDMA_READ_RESPONSE_MIDDLE): /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - if (qp->s_last != qp->s_tail) - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } - rdma_read: - if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) - goto ack_done; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + read_middle: if (unlikely(tlen != (hdrsize + pmtu + 4))) - goto ack_done; - if (unlikely(pmtu >= qp->s_len)) - goto ack_done; + goto ack_len_err; + if (unlikely(pmtu >= qp->s_rdma_read_len)) + goto ack_len_err; + /* We got a response so update the timeout. */ - if (unlikely(qp->s_last == qp->s_tail || - get_swqe_ptr(qp, qp->s_last)->wr.opcode != - IB_WR_RDMA_READ)) - goto ack_done; spin_lock(&dev->pending_lock); if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) list_move_tail(&qp->timerwait, @@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* * Update the RDMA receive state but do the copy w/o * holding the locks and blocking interrupts. - * XXX Yet another place that affects relaxed RDMA order - * since we don't want s_sge modified. */ - qp->s_len -= pmtu; + qp->s_rdma_read_len -= pmtu; update_last_psn(qp, psn); spin_unlock_irqrestore(&qp->s_lock, flags); - ipath_copy_sge(&qp->s_sge, data, pmtu); + ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); goto bail; - case OP(RDMA_READ_RESPONSE_LAST): - /* ACKs READ req. */ + case OP(RDMA_READ_RESPONSE_ONLY): if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - if (qp->s_last != qp->s_tail) - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } - /* FALLTHROUGH */ - case OP(RDMA_READ_RESPONSE_ONLY): - if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) - goto ack_done; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* + * Check that the data size is >= 0 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen < (hdrsize + pad + 8))) + goto ack_len_err; /* - * Get the number of bytes the message was padded by. + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. */ + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_last; + + case OP(RDMA_READ_RESPONSE_LAST): + /* ACKs READ req. */ + if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { + dev->n_rdma_seq++; + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + goto ack_done; + } + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; /* * Check that the data size is >= 1 && <= pmtu. * Remember to account for the AETH header (4) and * ICRC (4). */ - if (unlikely(tlen <= (hdrsize + pad + 8))) { - /* XXX Need to generate an error CQ entry. */ - goto ack_done; - } + if (unlikely(tlen <= (hdrsize + pad + 8))) + goto ack_len_err; + read_last: tlen -= hdrsize + pad + 8; - if (unlikely(tlen != qp->s_len)) { - /* XXX Need to generate an error CQ entry. */ - goto ack_done; - } + if (unlikely(tlen != qp->s_rdma_read_len)) + goto ack_len_err; if (!header_in_data) aeth = be32_to_cpu(ohdr->u.aeth); else { aeth = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); } - ipath_copy_sge(&qp->s_sge, data, tlen); - if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { - /* - * Change the state so we contimue - * processing new requests and wake up the - * tasklet if there are posted sends. - */ - qp->s_state = OP(SEND_LAST); - if (qp->s_tail != qp->s_head) - tasklet_hi_schedule(&qp->s_task); - } + ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); + (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); goto ack_done; } ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + +ack_op_err: + wc.status = IB_WC_LOC_QP_OP_ERR; + goto ack_err; + +ack_len_err: + wc.status = IB_WC_LOC_LEN_ERR; +ack_err: + wc.wr_id = wqe->wr.wr_id; + wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + wc.dlid_path_bits = 0; + wc.port_num = 0; + ipath_sqerror_qp(qp, &wc); bail: return; } @@ -1162,7 +1280,7 @@ bail: * incoming RC packet for the given QP. * Called at interrupt level. * Return 1 if no more processing is needed; otherwise return 0 to - * schedule a response to be sent and the s_lock unlocked. + * schedule a response to be sent. */ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, struct ipath_other_headers *ohdr, @@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, int diff, int header_in_data) { - struct ib_reth *reth; + struct ipath_ack_entry *e; + u8 i, prev; + int old_req; if (diff > 0) { /* * Packet sequence error. * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read, atomic, or - * NAK is pending though. + * Don't queue the NAK if we already sent one. */ - if (qp->s_ack_state != OP(ACKNOWLEDGE) || - qp->r_nak_state != 0) - goto done; - if (qp->r_ack_state < OP(COMPARE_SWAP)) { - qp->r_ack_state = OP(SEND_ONLY); + if (!qp->r_nak_state) { qp->r_nak_state = IB_NAK_PSN_ERROR; /* Use the expected PSN. */ qp->r_ack_psn = qp->r_psn; + goto send_ack; } - goto send_ack; + goto done; } /* @@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, * can coalesce an outstanding duplicate ACK. We have to * send the earliest so that RDMA reads can be restarted at * the requester's expected PSN. + * + * First, find where this duplicate PSN falls within the + * ACKs previously sent. */ - if (opcode == OP(RDMA_READ_REQUEST)) { + psn &= IPATH_PSN_MASK; + e = NULL; + old_req = 1; + spin_lock_irq(&qp->s_lock); + for (i = qp->r_head_ack_queue; ; i = prev) { + if (i == qp->s_tail_ack_queue) + old_req = 0; + if (i) + prev = i - 1; + else + prev = IPATH_MAX_RDMA_ATOMIC; + if (prev == qp->r_head_ack_queue) { + e = NULL; + break; + } + e = &qp->s_ack_queue[prev]; + if (!e->opcode) { + e = NULL; + break; + } + if (ipath_cmp24(psn, e->psn) >= 0) + break; + } + switch (opcode) { + case OP(RDMA_READ_REQUEST): { + struct ib_reth *reth; + u32 offset; + u32 len; + + /* + * If we didn't find the RDMA read request in the ack queue, + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. + */ + if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) + goto unlock_done; /* RETH comes after BTH */ if (!header_in_data) reth = &ohdr->u.rc.reth; @@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, data += sizeof(*reth); } /* - * If we receive a duplicate RDMA request, it means the - * requester saw a sequence error and needs to restart - * from an earlier point. We can abort the current - * RDMA read send in that case. + * Address range must be a subset of the original + * request and start on pmtu boundaries. + * We reuse the old ack_queue slot since the requester + * should not back up and request an earlier PSN for the + * same request. */ - spin_lock_irq(&qp->s_lock); - if (qp->s_ack_state != OP(ACKNOWLEDGE) && - (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { - /* - * We are already sending earlier requested data. - * Don't abort it to send later out of sequence data. - */ - spin_unlock_irq(&qp->s_lock); - goto done; - } - qp->s_rdma_len = be32_to_cpu(reth->length); - if (qp->s_rdma_len != 0) { + offset = ((psn - e->psn) & IPATH_PSN_MASK) * + ib_mtu_enum_to_int(qp->path_mtu); + len = be32_to_cpu(reth->length); + if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) + goto unlock_done; + if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; - /* - * Address range must be a subset of the original - * request and start on pmtu boundaries. - */ - ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, - qp->s_rdma_len, vaddr, rkey, + ok = ipath_rkey_ok(qp, &e->rdma_sge, + len, vaddr, rkey, IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) { - spin_unlock_irq(&qp->s_lock); - goto done; - } + if (unlikely(!ok)) + goto unlock_done; } else { - qp->s_rdma_sge.sg_list = NULL; - qp->s_rdma_sge.num_sge = 0; - qp->s_rdma_sge.sge.mr = NULL; - qp->s_rdma_sge.sge.vaddr = NULL; - qp->s_rdma_sge.sge.length = 0; - qp->s_rdma_sge.sge.sge_length = 0; + e->rdma_sge.sg_list = NULL; + e->rdma_sge.num_sge = 0; + e->rdma_sge.sge.mr = NULL; + e->rdma_sge.sge.vaddr = NULL; + e->rdma_sge.sge.length = 0; + e->rdma_sge.sge.sge_length = 0; } - qp->s_ack_state = opcode; - qp->s_ack_psn = psn; - spin_unlock_irq(&qp->s_lock); - tasklet_hi_schedule(&qp->s_task); - goto send_ack; + e->psn = psn; + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = prev; + break; } - /* - * A pending RDMA read will ACK anything before it so - * ignore earlier duplicate requests. - */ - if (qp->s_ack_state != OP(ACKNOWLEDGE)) - goto done; - - /* - * If an ACK is pending, don't replace the pending ACK - * with an earlier one since the later one will ACK the earlier. - * Also, if we already have a pending atomic, send it. - */ - if (qp->r_ack_state != OP(ACKNOWLEDGE) && - (ipath_cmp24(psn, qp->r_ack_psn) <= 0 || - qp->r_ack_state >= OP(COMPARE_SWAP))) - goto send_ack; - switch (opcode) { case OP(COMPARE_SWAP): - case OP(FETCH_ADD): + case OP(FETCH_ADD): { /* - * Check for the PSN of the last atomic operation - * performed and resend the result if found. + * If we didn't find the atomic request in the ack queue + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. */ - if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) - goto done; + if (!e || e->opcode != (u8) opcode || old_req) + goto unlock_done; + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = prev; + break; + } + + default: + if (old_req) + goto unlock_done; + /* + * Resend the most recent ACK if this request is + * after all the previous RDMA reads and atomics. + */ + if (i == qp->r_head_ack_queue) { + spin_unlock_irq(&qp->s_lock); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->r_psn - 1; + goto send_ack; + } + /* + * Resend the RDMA read or atomic op which + * ACKs this duplicate request. + */ + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = i; break; } - qp->r_ack_state = opcode; qp->r_nak_state = 0; - qp->r_ack_psn = psn; -send_ack: - return 0; + spin_unlock_irq(&qp->s_lock); + tasklet_hi_schedule(&qp->s_task); +unlock_done: + spin_unlock_irq(&qp->s_lock); done: return 1; + +send_ack: + return 0; } static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) @@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, opcode == OP(SEND_LAST_WITH_IMMEDIATE)) break; nack_inv: - /* - * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read, atomic, or NAK - * is pending though. - */ - if (qp->r_ack_state >= OP(COMPARE_SWAP)) - goto send_ack; ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); - qp->r_ack_state = OP(SEND_ONLY); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; goto send_ack; @@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * Don't queue the NAK if a RDMA read or atomic * is pending though. */ - if (qp->r_ack_state >= OP(COMPARE_SWAP)) - goto send_ack; - qp->r_ack_state = OP(SEND_ONLY); + if (qp->r_nak_state) + goto done; qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; qp->r_ack_psn = qp->r_psn; goto send_ack; @@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto rnr_nak; goto send_last_imm; - case OP(RDMA_READ_REQUEST): + case OP(RDMA_READ_REQUEST): { + struct ipath_ack_entry *e; + u32 len; + u8 next; + + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto nack_acc; + next = qp->r_head_ack_queue + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + if (unlikely(next == qp->s_tail_ack_queue)) + goto nack_inv; + e = &qp->s_ack_queue[qp->r_head_ack_queue]; /* RETH comes after BTH */ if (!header_in_data) reth = &ohdr->u.rc.reth; @@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, reth = (struct ib_reth *)data; data += sizeof(*reth); } - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto nack_acc; - spin_lock_irq(&qp->s_lock); - qp->s_rdma_len = be32_to_cpu(reth->length); - if (qp->s_rdma_len != 0) { + len = be32_to_cpu(reth->length); + if (len) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, - qp->s_rdma_len, vaddr, rkey, - IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) { - spin_unlock_irq(&qp->s_lock); + ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, + rkey, IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) goto nack_acc; - } /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (qp->s_rdma_len > pmtu) - qp->r_psn += (qp->s_rdma_len - 1) / pmtu; + if (len > pmtu) + qp->r_psn += (len - 1) / pmtu; } else { - qp->s_rdma_sge.sg_list = NULL; - qp->s_rdma_sge.num_sge = 0; - qp->s_rdma_sge.sge.mr = NULL; - qp->s_rdma_sge.sge.vaddr = NULL; - qp->s_rdma_sge.sge.length = 0; - qp->s_rdma_sge.sge.sge_length = 0; + e->rdma_sge.sg_list = NULL; + e->rdma_sge.num_sge = 0; + e->rdma_sge.sge.mr = NULL; + e->rdma_sge.sge.vaddr = NULL; + e->rdma_sge.sge.length = 0; + e->rdma_sge.sge.sge_length = 0; } + e->opcode = opcode; + e->psn = psn; /* * We need to increment the MSN here instead of when we * finish sending the result since a duplicate request would * increment it more than once. */ qp->r_msn++; - - qp->s_ack_state = opcode; - qp->s_ack_psn = psn; - spin_unlock_irq(&qp->s_lock); - qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; + barrier(); + qp->r_head_ack_queue = next; /* Call ipath_do_rc_send() in another thread. */ tasklet_hi_schedule(&qp->s_task); goto done; + } case OP(COMPARE_SWAP): case OP(FETCH_ADD): { struct ib_atomic_eth *ateth; + struct ipath_ack_entry *e; u64 vaddr; + atomic64_t *maddr; u64 sdata; u32 rkey; + u8 next; + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC))) + goto nack_acc; + next = qp->r_head_ack_queue + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + if (unlikely(next == qp->s_tail_ack_queue)) + goto nack_inv; if (!header_in_data) ateth = &ohdr->u.atomic_eth; - else { + else ateth = (struct ib_atomic_eth *)data; - data += sizeof(*ateth); - } - vaddr = be64_to_cpu(ateth->vaddr); + vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | + be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv; rkey = be32_to_cpu(ateth->rkey); @@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc; - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); - spin_lock_irq(&dev->pending_lock); - qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; - if (opcode == OP(FETCH_ADD)) - *(u64 *) qp->r_sge.sge.vaddr = - qp->r_atomic_data + sdata; - else if (qp->r_atomic_data == - be64_to_cpu(ateth->compare_data)) - *(u64 *) qp->r_sge.sge.vaddr = sdata; - spin_unlock_irq(&dev->pending_lock); + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + e->atomic_data = (opcode == OP(FETCH_ADD)) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + be64_to_cpu(ateth->compare_data), + sdata); + e->opcode = opcode; + e->psn = psn & IPATH_PSN_MASK; qp->r_msn++; - qp->r_atomic_psn = psn & IPATH_PSN_MASK; - psn |= 1 << 31; - break; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + barrier(); + qp->r_head_ack_queue = next; + + /* Call ipath_do_rc_send() in another thread. */ + tasklet_hi_schedule(&qp->s_task); + + goto done; } default: - /* Drop packet for unknown opcodes. */ - goto done; + /* NAK unknown opcodes. */ + goto nack_inv; } qp->r_psn++; qp->r_state = opcode; + qp->r_ack_psn = psn; qp->r_nak_state = 0; /* Send an ACK if requested or required. */ - if (psn & (1 << 31)) { - /* - * Coalesce ACKs unless there is a RDMA READ or - * ATOMIC pending. - */ - if (qp->r_ack_state < OP(COMPARE_SWAP)) { - qp->r_ack_state = opcode; - qp->r_ack_psn = psn; - } + if (psn & (1 << 31)) goto send_ack; - } goto done; nack_acc: - /* - * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read, atomic, or NAK - * is pending though. - */ - if (qp->r_ack_state < OP(COMPARE_SWAP)) { - ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); - qp->r_ack_state = OP(RDMA_WRITE_ONLY); - qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; - qp->r_ack_psn = qp->r_psn; - } + ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); + qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; + qp->r_ack_psn = qp->r_psn; + send_ack: - /* Send ACK right away unless the send tasklet has a pending ACK. */ - if (qp->s_ack_state == OP(ACKNOWLEDGE)) - send_rc_ack(qp); + send_rc_ack(qp); done: return; diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index dffc76016d3c..c182bcd62098 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -126,9 +126,18 @@ #define INFINIPATH_E_RESET 0x0004000000000000ULL #define INFINIPATH_E_HARDWARE 0x0008000000000000ULL +/* + * this is used to print "common" packet errors only when the + * __IPATH_ERRPKTDBG bit is set in ipath_debug. + */ +#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \ + | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \ + | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \ + | INFINIPATH_E_REBP ) + /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo - * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID + * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID * bit 4: flag buffer, 5: datainfo, 6: header info */ #define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 @@ -143,8 +152,8 @@ /* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ #define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL @@ -299,13 +308,6 @@ #define INFINIPATH_XGXS_RX_POL_SHIFT 19 #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL -#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ - -/* TID entries (memory), HT-only */ -#define INFINIPATH_RT_VALID 0x8000000000000000ULL -#define INFINIPATH_RT_ADDR_SHIFT 0 -#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF -#define INFINIPATH_RT_BUFSIZE_SHIFT 48 /* * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index e86cb171872e..d9c2a9b15d86 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wq->tail = tail; ret = 1; + qp->r_wrid_valid = 1; if (handler) { u32 n; @@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } } spin_unlock_irqrestore(&rq->lock, flags); - qp->r_wrid_valid = 1; bail: return ret; @@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) unsigned long flags; struct ib_wc wc; u64 sdata; + atomic64_t *maddr; qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); if (!qp) { @@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) again: spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || + qp->s_rnr_timeout) { spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } @@ -310,7 +312,7 @@ again: sqp->s_rnr_retry--; dev->n_rnr_naks++; sqp->s_rnr_timeout = - ib_ipath_rnr_table[sqp->r_min_rnr_timer]; + ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); goto done; } @@ -343,20 +345,22 @@ again: wc.sl = sqp->remote_ah_attr.sl; wc.dlid_path_bits = 0; wc.port_num = 0; + spin_lock_irqsave(&sqp->s_lock, flags); ipath_sqerror_qp(sqp, &wc); + spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } break; case IB_WR_RDMA_READ: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_READ))) + goto acc_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto acc_err; qp->r_sge.sge = wqe->sg_list[0]; qp->r_sge.sg_list = wqe->sg_list + 1; qp->r_sge.num_sge = wqe->wr.num_sge; @@ -364,22 +368,22 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC))) + goto acc_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->wr.wr.atomic.remote_addr, + wqe->wr.wr.atomic.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ - sdata = wqe->wr.wr.atomic.swap; - spin_lock_irqsave(&dev->pending_lock, flags); - qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; - if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - *(u64 *) qp->r_sge.sge.vaddr = - qp->r_atomic_data + sdata; - else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) - *(u64 *) qp->r_sge.sge.vaddr = sdata; - spin_unlock_irqrestore(&dev->pending_lock, flags); - *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data; + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = wqe->wr.wr.atomic.compare_add; + *(u64 *) sqp->s_sge.sge.vaddr = + (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + sdata, wqe->wr.wr.atomic.swap); goto send_comp; default: @@ -440,7 +444,7 @@ again: send_comp: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || + if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; @@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) * We clear the tasklet flag now since we are committing to return * from the tasklet function. */ - clear_bit(IPATH_S_BUSY, &qp->s_flags); + clear_bit(IPATH_S_BUSY, &qp->s_busy); tasklet_unlock(&qp->s_task); want_buffer(dev->dd); dev->n_piowait++; @@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr) wr->sg_list[0].addr & (sizeof(u64) - 1))) { ret = -EINVAL; goto bail; + } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { + ret = -EINVAL; + goto bail; } /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) { @@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data) u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); struct ipath_other_headers *ohdr; - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) + if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) goto bail; if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { @@ -683,19 +690,15 @@ again: */ spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ - if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE && - (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) - bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK; - else if (!((qp->ibqp.qp_type == IB_QPT_RC) ? - ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : - ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { + if (!((qp->ibqp.qp_type == IB_QPT_RC) ? + ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : + ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { /* * Clear the busy bit before unlocking to avoid races with * adding new work queue items and then failing to process * them. */ - clear_bit(IPATH_S_BUSY, &qp->s_flags); + clear_bit(IPATH_S_BUSY, &qp->s_busy); spin_unlock_irqrestore(&qp->s_lock, flags); goto bail; } @@ -728,7 +731,7 @@ again: goto again; clear: - clear_bit(IPATH_S_BUSY, &qp->s_flags); + clear_bit(IPATH_S_BUSY, &qp->s_busy); bail: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 30a825928fcf..9307f7187ca5 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque) * don't access the chip while running diags, or memory diags can * fail */ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) || + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) || ipath_diag_inuse) /* but re-arm the timer, for diags case; won't hurt other */ goto done; @@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque) if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) && time_after(jiffies, dd->ipath_unmasktime)) { char ebuf[256]; - ipath_decode_err(ebuf, sizeof ebuf, + int iserr; + iserr = ipath_decode_err(ebuf, sizeof ebuf, (dd->ipath_maskederrs & ~dd-> ipath_ignorederrs)); if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) + ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_PKTERRS )) ipath_dev_err(dd, "Re-enabling masked errors " "(%s)\n", ebuf); else { @@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque) * them. So only complain about these at debug * level. */ - ipath_dbg("Disabling frequent queue full errors " - "(%s)\n", ebuf); + if (iserr) + ipath_dbg("Re-enabling queue full errors (%s)\n", + ebuf); + else + ipath_cdbg(ERRPKT, "Re-enabling packet" + " problem interrupt (%s)\n", ebuf); } dd->ipath_maskederrs = dd->ipath_ignorederrs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 325d6634ff53..1c2b03c2ef5e 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, { if (++qp->s_last == qp->s_size) qp->s_last = 0; - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc->wr_id = wqe->wr.wr_id; wc->status = IB_WC_SUCCESS; @@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, send_first: if (qp->r_reuse_sge) { qp->r_reuse_sge = 0; - qp->r_sge = qp->s_rdma_sge; + qp->r_sge = qp->s_rdma_read_sge; } else if (!ipath_get_rwqe(qp, 0)) { dev->n_pkt_drops++; goto done; } /* Save the WQE so we can reuse it in case of an error. */ - qp->s_rdma_sge = qp->r_sge; + qp->s_rdma_read_sge = qp->r_sge; qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto send_last; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 9a3e54664ee4..a518f7c8fa83 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) goto bail; } + if (wr->wr.ud.ah->pd != qp->ibqp.pd) { + ret = -EPERM; + goto bail; + } + /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) { ret = -EINVAL; @@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) done: /* Queue the completion status entry. */ - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wr->send_flags & IB_SEND_SIGNALED)) { wc.wr_id = wr->wr_id; wc.status = IB_WC_SUCCESS; @@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); + qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 2aaacdb7e52a..18c6df2052c2 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, struct ipath_mcast *mcast; struct ipath_mcast_qp *p; + if (lnh != IPATH_LRH_GRH) { + dev->n_pkt_drops++; + goto bail; + } mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); if (mcast == NULL) { dev->n_pkt_drops++; @@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, } dev->n_multicast_rcv++; list_for_each_entry_rcu(p, &mcast->qp_list, list) - ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, - tlen, p->qp); + ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); /* * Notify ipath_multicast_detach() if it is waiting for us * to finish. @@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, /* +1 is for the qword padding of pbc */ plen = hdrwords + ((len + 3) >> 2) + 1; if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { - ipath_dbg("packet len 0x%x too long, failing\n", plen); ret = -EINVAL; goto bail; } @@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev, props->max_cqe = ib_ipath_max_cqes; props->max_mr = dev->lk_table.max; props->max_pd = ib_ipath_max_pds; - props->max_qp_rd_atom = 1; - props->max_qp_init_rd_atom = 1; + props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; + props->max_qp_init_rd_atom = 255; /* props->max_res_rd_atom */ props->max_srq = ib_ipath_max_srqs; props->max_srq_wr = ib_ipath_max_srq_wrs; props->max_srq_sge = ib_ipath_max_srq_sges; /* props->local_ca_ack_delay */ - props->atomic_cap = IB_ATOMIC_HCA; + props->atomic_cap = IB_ATOMIC_GLOB; props->max_pkeys = ipath_get_npkeys(dev->dd); props->max_mcast_grp = ib_ipath_max_mcast_grps; props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; @@ -1557,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->dma_device = &dd->pcidev->dev; - dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; dev->query_port = ipath_query_port; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index c0c8d5b24a7d..7c4929f1cb5b 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -40,9 +40,12 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <rdma/ib_pack.h> +#include <rdma/ib_user_verbs.h> #include "ipath_layer.h" +#define IPATH_MAX_RDMA_ATOMIC 4 + #define QPN_MAX (1 << 24) #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) @@ -89,7 +92,7 @@ struct ib_reth { } __attribute__ ((packed)); struct ib_atomic_eth { - __be64 vaddr; + __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ __be32 rkey; __be64 swap_data; __be64 compare_data; @@ -108,7 +111,7 @@ struct ipath_other_headers { } rc; struct { __be32 aeth; - __be64 atomic_ack_eth; + __be32 atomic_ack_eth[2]; } at; __be32 imm_data; __be32 aeth; @@ -186,7 +189,7 @@ struct ipath_mmap_info { struct ipath_cq_wc { u32 head; /* index of next entry to fill */ u32 tail; /* index of next ib_poll_cq() entry */ - struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ }; /* @@ -312,6 +315,19 @@ struct ipath_sge_state { }; /* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct ipath_ack_entry { + u8 opcode; + u32 psn; + union { + struct ipath_sge_state rdma_sge; + u64 atomic_data; + }; +}; + +/* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). * Variables prefixed with ack_ are for responder replies. @@ -333,24 +349,24 @@ struct ipath_qp { struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; struct ipath_sge_state s_sge; /* current send request data */ - /* current RDMA read send data */ - struct ipath_sge_state s_rdma_sge; + struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1]; + struct ipath_sge_state s_ack_rdma_sge; + struct ipath_sge_state s_rdma_read_sge; struct ipath_sge_state r_sge; /* current receive data */ spinlock_t s_lock; - unsigned long s_flags; + unsigned long s_busy; u32 s_hdrwords; /* size of s_hdr in 32 bit words */ u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ - u32 s_rdma_len; /* total length of s_rdma_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_next_psn; /* PSN for next request */ u32 s_last_psn; /* last response PSN processed */ u32 s_psn; /* current packet sequence number */ - u32 s_ack_psn; /* PSN for RDMA_READ */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u64 r_wr_id; /* ID for current receive WQE */ - u64 r_atomic_data; /* data for last atomic op */ - u32 r_atomic_psn; /* PSN of last atomic op */ u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_psn; /* expected rcv packet sequence number */ @@ -360,12 +376,13 @@ struct ipath_qp { u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ u8 r_state; /* opcode of last packet received */ - u8 r_ack_state; /* opcode of packet to ACK */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 r_reuse_sge; /* for UC receive errors */ u8 r_sge_inx; /* current index into sg_list */ u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 qp_access_flags; u8 s_max_sge; /* size of s_wq->sg_list */ u8 s_retry_cnt; /* number of times to retry */ @@ -374,6 +391,10 @@ struct ipath_qp { u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + u8 s_flags; u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; @@ -390,11 +411,16 @@ struct ipath_qp { struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; +/* Bit definition for s_busy. */ +#define IPATH_S_BUSY 0 + /* * Bit definitions for s_flags. */ -#define IPATH_S_BUSY 0 -#define IPATH_S_SIGNAL_REQ_WR 1 +#define IPATH_S_SIGNAL_REQ_WR 0x01 +#define IPATH_S_FENCE_PENDING 0x02 +#define IPATH_S_RDMAR_PENDING 0x04 +#define IPATH_S_ACK_PENDING 0x08 #define IPATH_PSN_CREDIT 2048 @@ -706,8 +732,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int ipath_destroy_srq(struct ib_srq *ibsrq); -void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); - int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, @@ -757,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, void ipath_do_ruc_send(unsigned long data); -u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr, - u32 pmtu); - int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, u32 pmtu, u32 *bth0p, u32 *bth2p); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 0d9b7d06bbc2..773145e29947 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1013,14 +1013,14 @@ static struct { u64 latest_fw; u32 flags; } mthca_hca_table[] = { - [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0), + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0), .flags = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600), + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200), .flags = MTHCA_FLAG_PCIE }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400), + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE }, - [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 1, 0), + [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE | MTHCA_FLAG_SINAI_OPT } @@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) goto err_cmd; if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { - mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", + mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[hca_type].latest_fw >> 32), diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 0b9d053a599d..48f7c65e9aed 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -175,7 +175,9 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, if (!ret) { ++chunk->npages; - if (!coherent && chunk->npages == MTHCA_ICM_CHUNK_LEN) { + if (coherent) + ++chunk->nsg; + else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) { chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 6037dd3f87df..aa6c70a6a36f 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -297,7 +297,8 @@ out: int mthca_write_mtt_size(struct mthca_dev *dev) { - if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || + !(dev->mthca_flags & MTHCA_FLAG_FMR)) /* * Be friendly to WRITE_MTT command * and leave two empty slots for the @@ -310,8 +311,9 @@ int mthca_write_mtt_size(struct mthca_dev *dev) return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff; } -void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, - int start_index, u64 *buffer_list, int list_len) +static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, + struct mthca_mtt *mtt, int start_index, + u64 *buffer_list, int list_len) { u64 __iomem *mtts; int i; @@ -323,8 +325,9 @@ void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, mtts + i); } -void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt, - int start_index, u64 *buffer_list, int list_len) +static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, + struct mthca_mtt *mtt, int start_index, + u64 *buffer_list, int list_len) { __be64 *mtts; dma_addr_t dma_handle; @@ -353,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, int size = mthca_write_mtt_size(dev); int chunk; - if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || + !(dev->mthca_flags & MTHCA_FLAG_FMR)) return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); while (list_len > 0) { @@ -833,6 +837,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) key = arbel_key_to_hw_index(fmr->ibmr.lkey); key &= dev->limits.num_mpts - 1; + key = adjust_key(dev, key); fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); fmr->maps = 0; @@ -879,8 +884,8 @@ int mthca_init_mr_table(struct mthca_dev *dev) } mpts = mtts = 1 << i; } else { - mpts = dev->limits.num_mtt_segs; - mtts = dev->limits.num_mpts; + mtts = dev->limits.num_mtt_segs; + mpts = dev->limits.num_mpts; } if (!mthca_is_memfree(dev) && diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 0725ad7ad9bf..47e6fd46d9c2 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1293,7 +1293,6 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; - dev->ib_dev.class_dev.dev = &dev->pdev->dev; dev->ib_dev.query_device = mthca_query_device; dev->ib_dev.query_port = mthca_query_port; dev->ib_dev.modify_device = mthca_modify_device; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 224c93dd29eb..8fe6fee7a97a 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, goto out; } + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + if ((attr_mask & IB_QP_PKEY_INDEX) && attr->pkey_index >= dev->limits.pkey_table_len) { mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", @@ -1083,21 +1088,21 @@ static void mthca_unmap_memfree(struct mthca_dev *dev, static int mthca_alloc_memfree(struct mthca_dev *dev, struct mthca_qp *qp) { - int ret = 0; - if (mthca_is_memfree(dev)) { qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, qp->qpn, &qp->rq.db); if (qp->rq.db_index < 0) - return ret; + return -ENOMEM; qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, qp->qpn, &qp->sq.db); - if (qp->sq.db_index < 0) + if (qp->sq.db_index < 0) { mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + return -ENOMEM; + } } - return ret; + return 0; } static void mthca_free_memfree(struct mthca_dev *dev, @@ -1414,11 +1419,10 @@ void mthca_free_qp(struct mthca_dev *dev, * unref the mem-free tables and free the QPN in our table. */ if (!qp->ibqp.uobject) { - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, + mthca_cq_clean(dev, recv_cq, qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); - if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, - qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + mthca_cq_clean(dev, send_cq, qp->qpn, NULL); mthca_free_memfree(dev, qp); mthca_free_wqe_buf(dev, qp); |