diff options
Diffstat (limited to 'drivers/infiniband')
60 files changed, 2119 insertions, 851 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 163d991eb8c9..50fb1cd447b7 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,9 +1,11 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o +user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ ib_cm.o iw_cm.o $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o -obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o +obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ + $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o @@ -18,6 +20,8 @@ iw_cm-y := iwcm.o rdma_cm-y := cma.o +rdma_ucm-y := ucma.o + ib_addr-y := addr.o ib_umad-y := user_mad.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index af939796750d..d2bb5a9a303f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -360,8 +360,7 @@ static int netevent_callback(struct notifier_block *self, unsigned long event, if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; - if (neigh->dev->type == ARPHRD_INFINIBAND && - (neigh->nud_state & NUD_VALID)) { + if (neigh->nud_state & NUD_VALID) { set_timeout(jiffies); } } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 79c937bf6962..d446998b12a4 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3289,6 +3289,10 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { + /* Allow transition to RTS before sending REP */ + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 985a6b564d8f..9e0ab048c878 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -70,6 +70,7 @@ static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); +static DEFINE_IDR(udp_ps); struct cma_device { struct list_head list; @@ -133,7 +134,6 @@ struct rdma_id_private { u32 seq_num; u32 qp_num; - enum ib_qp_type qp_type; u8 srq; }; @@ -392,7 +392,6 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, id->qp = qp; id_priv->qp_num = qp->qp_num; - id_priv->qp_type = qp->qp_type; id_priv->srq = (qp->srq != NULL); return 0; err: @@ -510,9 +509,17 @@ static inline int cma_any_addr(struct sockaddr *addr) return cma_zero_addr(addr) || cma_loopback_addr(addr); } +static inline __be16 cma_port(struct sockaddr *addr) +{ + if (addr->sa_family == AF_INET) + return ((struct sockaddr_in *) addr)->sin_port; + else + return ((struct sockaddr_in6 *) addr)->sin6_port; +} + static inline int cma_any_port(struct sockaddr *addr) { - return !((struct sockaddr_in *) addr)->sin_port; + return !cma_port(addr); } static int cma_get_net_info(void *hdr, enum rdma_port_space ps, @@ -594,20 +601,6 @@ static inline int cma_user_data_offset(enum rdma_port_space ps) } } -static int cma_notify_user(struct rdma_id_private *id_priv, - enum rdma_cm_event_type type, int status, - void *data, u8 data_len) -{ - struct rdma_cm_event event; - - event.event = type; - event.status = status; - event.private_data = data; - event.private_data_len = data_len; - - return id_priv->id.event_handler(&id_priv->id, &event); -} - static void cma_cancel_route(struct rdma_id_private *id_priv) { switch (rdma_node_get_transport(id_priv->id.device->node_type)) { @@ -776,63 +769,61 @@ static int cma_verify_rep(struct rdma_id_private *id_priv, void *data) return 0; } -static int cma_rtu_recv(struct rdma_id_private *id_priv) +static void cma_set_rep_event_data(struct rdma_cm_event *event, + struct ib_cm_rep_event_param *rep_data, + void *private_data) { - int ret; - - ret = cma_modify_qp_rts(&id_priv->id); - if (ret) - goto reject; - - return 0; -reject: - cma_modify_qp_err(&id_priv->id); - ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, - NULL, 0, NULL, 0); - return ret; + event->param.conn.private_data = private_data; + event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + event->param.conn.responder_resources = rep_data->responder_resources; + event->param.conn.initiator_depth = rep_data->initiator_depth; + event->param.conn.flow_control = rep_data->flow_control; + event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; + event->param.conn.srq = rep_data->srq; + event->param.conn.qp_num = rep_data->remote_qpn; } static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; - enum rdma_cm_event_type event; - u8 private_data_len = 0; - int ret = 0, status = 0; + struct rdma_cm_event event; + int ret = 0; atomic_inc(&id_priv->dev_remove); if (!cma_comp(id_priv, CMA_CONNECT)) goto out; + memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: case IB_CM_REP_ERROR: - event = RDMA_CM_EVENT_UNREACHABLE; - status = -ETIMEDOUT; + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: - status = cma_verify_rep(id_priv, ib_event->private_data); - if (status) - event = RDMA_CM_EVENT_CONNECT_ERROR; + event.status = cma_verify_rep(id_priv, ib_event->private_data); + if (event.status) + event.event = RDMA_CM_EVENT_CONNECT_ERROR; else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) { - status = cma_rep_recv(id_priv); - event = status ? RDMA_CM_EVENT_CONNECT_ERROR : - RDMA_CM_EVENT_ESTABLISHED; + event.status = cma_rep_recv(id_priv); + event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : + RDMA_CM_EVENT_ESTABLISHED; } else - event = RDMA_CM_EVENT_CONNECT_RESPONSE; - private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; + cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, + ib_event->private_data); break; case IB_CM_RTU_RECEIVED: - status = cma_rtu_recv(id_priv); - event = status ? RDMA_CM_EVENT_CONNECT_ERROR : - RDMA_CM_EVENT_ESTABLISHED; + case IB_CM_USER_ESTABLISHED: + event.event = RDMA_CM_EVENT_ESTABLISHED; break; case IB_CM_DREQ_ERROR: - status = -ETIMEDOUT; /* fall through */ + event.status = -ETIMEDOUT; /* fall through */ case IB_CM_DREQ_RECEIVED: case IB_CM_DREP_RECEIVED: if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT)) goto out; - event = RDMA_CM_EVENT_DISCONNECTED; + event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IB_CM_TIMEWAIT_EXIT: case IB_CM_MRA_RECEIVED: @@ -840,9 +831,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto out; case IB_CM_REJ_RECEIVED: cma_modify_qp_err(&id_priv->id); - status = ib_event->param.rej_rcvd.reason; - event = RDMA_CM_EVENT_REJECTED; - private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; + event.status = ib_event->param.rej_rcvd.reason; + event.event = RDMA_CM_EVENT_REJECTED; + event.param.conn.private_data = ib_event->private_data; + event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", @@ -850,8 +842,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto out; } - ret = cma_notify_user(id_priv, event, status, ib_event->private_data, - private_data_len); + ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; @@ -865,8 +856,8 @@ out: return ret; } -static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event) +static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; @@ -913,9 +904,61 @@ err: return NULL; } +static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv; + struct rdma_cm_id *id; + union cma_ip_addr *src, *dst; + __u16 port; + u8 ip_ver; + int ret; + + id = rdma_create_id(listen_id->event_handler, listen_id->context, + listen_id->ps); + if (IS_ERR(id)) + return NULL; + + + if (cma_get_net_info(ib_event->private_data, listen_id->ps, + &ip_ver, &port, &src, &dst)) + goto err; + + cma_save_net_info(&id->route.addr, &listen_id->route.addr, + ip_ver, port, src, dst); + + ret = rdma_translate_ip(&id->route.addr.src_addr, + &id->route.addr.dev_addr); + if (ret) + goto err; + + id_priv = container_of(id, struct rdma_id_private, id); + id_priv->state = CMA_CONNECT; + return id_priv; +err: + rdma_destroy_id(id); + return NULL; +} + +static void cma_set_req_event_data(struct rdma_cm_event *event, + struct ib_cm_req_event_param *req_data, + void *private_data, int offset) +{ + event->param.conn.private_data = private_data + offset; + event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; + event->param.conn.responder_resources = req_data->responder_resources; + event->param.conn.initiator_depth = req_data->initiator_depth; + event->param.conn.flow_control = req_data->flow_control; + event->param.conn.retry_count = req_data->retry_count; + event->param.conn.rnr_retry_count = req_data->rnr_retry_count; + event->param.conn.srq = req_data->srq; + event->param.conn.qp_num = req_data->remote_qpn; +} + static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id; + struct rdma_cm_event event; int offset, ret; listen_id = cm_id->context; @@ -925,7 +968,19 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto out; } - conn_id = cma_new_id(&listen_id->id, ib_event); + memset(&event, 0, sizeof event); + offset = cma_user_data_offset(listen_id->id.ps); + event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + if (listen_id->id.ps == RDMA_PS_UDP) { + conn_id = cma_new_udp_id(&listen_id->id, ib_event); + event.param.ud.private_data = ib_event->private_data + offset; + event.param.ud.private_data_len = + IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; + } else { + conn_id = cma_new_conn_id(&listen_id->id, ib_event); + cma_set_req_event_data(&event, &ib_event->param.req_rcvd, + ib_event->private_data, offset); + } if (!conn_id) { ret = -ENOMEM; goto out; @@ -942,10 +997,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; - offset = cma_user_data_offset(listen_id->id.ps); - ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, - ib_event->private_data + offset, - IB_CM_REQ_PRIVATE_DATA_SIZE - offset); + ret = conn_id->id.event_handler(&conn_id->id, &event); if (!ret) goto out; @@ -964,8 +1016,7 @@ out: static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) { - return cpu_to_be64(((u64)ps << 16) + - be16_to_cpu(((struct sockaddr_in *) addr)->sin_port)); + return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr))); } static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, @@ -1021,36 +1072,49 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; - enum rdma_cm_event_type event = 0; + struct rdma_cm_event event; struct sockaddr_in *sin; int ret = 0; + memset(&event, 0, sizeof event); atomic_inc(&id_priv->dev_remove); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: - event = RDMA_CM_EVENT_DISCONNECTED; + event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IW_CM_EVENT_CONNECT_REPLY: sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; *sin = iw_event->local_addr; sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; *sin = iw_event->remote_addr; - if (iw_event->status) - event = RDMA_CM_EVENT_REJECTED; - else - event = RDMA_CM_EVENT_ESTABLISHED; + switch (iw_event->status) { + case 0: + event.event = RDMA_CM_EVENT_ESTABLISHED; + break; + case -ECONNRESET: + case -ECONNREFUSED: + event.event = RDMA_CM_EVENT_REJECTED; + break; + case -ETIMEDOUT: + event.event = RDMA_CM_EVENT_UNREACHABLE; + break; + default: + event.event = RDMA_CM_EVENT_CONNECT_ERROR; + break; + } break; case IW_CM_EVENT_ESTABLISHED: - event = RDMA_CM_EVENT_ESTABLISHED; + event.event = RDMA_CM_EVENT_ESTABLISHED; break; default: BUG_ON(1); } - ret = cma_notify_user(id_priv, event, iw_event->status, - iw_event->private_data, - iw_event->private_data_len); + event.status = iw_event->status; + event.param.conn.private_data = iw_event->private_data; + event.param.conn.private_data_len = iw_event->private_data_len; + ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; @@ -1071,6 +1135,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct rdma_id_private *listen_id, *conn_id; struct sockaddr_in *sin; struct net_device *dev = NULL; + struct rdma_cm_event event; int ret; listen_id = cm_id->context; @@ -1124,9 +1189,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; *sin = iw_event->remote_addr; - ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, - iw_event->private_data, - iw_event->private_data_len); + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + event.param.conn.private_data = iw_event->private_data; + event.param.conn.private_data_len = iw_event->private_data_len; + ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; @@ -1515,8 +1582,9 @@ static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_dev_addr *dev_addr, void *context) { struct rdma_id_private *id_priv = context; - enum rdma_cm_event_type event; + struct rdma_cm_event event; + memset(&event, 0, sizeof event); atomic_inc(&id_priv->dev_remove); /* @@ -1536,14 +1604,15 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (status) { if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) goto out; - event = RDMA_CM_EVENT_ADDR_ERROR; + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = status; } else { memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); - event = RDMA_CM_EVENT_ADDR_RESOLVED; + event.event = RDMA_CM_EVENT_ADDR_RESOLVED; } - if (cma_notify_user(id_priv, event, status, NULL, 0)) { + if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, CMA_DESTROYING); cma_release_remove(id_priv); cma_deref_id(id_priv); @@ -1733,6 +1802,9 @@ static int cma_get_port(struct rdma_id_private *id_priv) case RDMA_PS_TCP: ps = &tcp_ps; break; + case RDMA_PS_UDP: + ps = &udp_ps; + break; default: return -EPROTONOSUPPORT; } @@ -1821,6 +1893,110 @@ static int cma_format_hdr(void *hdr, enum rdma_port_space ps, return 0; } +static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv = cm_id->context; + struct rdma_cm_event event; + struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; + int ret = 0; + + memset(&event, 0, sizeof event); + atomic_inc(&id_priv->dev_remove); + if (!cma_comp(id_priv, CMA_CONNECT)) + goto out; + + switch (ib_event->event) { + case IB_CM_SIDR_REQ_ERROR: + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -ETIMEDOUT; + break; + case IB_CM_SIDR_REP_RECEIVED: + event.param.ud.private_data = ib_event->private_data; + event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; + if (rep->status != IB_SIDR_SUCCESS) { + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = ib_event->param.sidr_rep_rcvd.status; + break; + } + if (rep->qkey != RDMA_UD_QKEY) { + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -EINVAL; + break; + } + ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, + id_priv->id.route.path_rec, + &event.param.ud.ah_attr); + event.param.ud.qp_num = rep->qpn; + event.param.ud.qkey = rep->qkey; + event.event = RDMA_CM_EVENT_ESTABLISHED; + event.status = 0; + break; + default: + printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", + ib_event->event); + goto out; + } + + ret = id_priv->id.event_handler(&id_priv->id, &event); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.ib = NULL; + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return ret; + } +out: + cma_release_remove(id_priv); + return ret; +} + +static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_sidr_req_param req; + struct rdma_route *route; + int ret; + + req.private_data_len = sizeof(struct cma_hdr) + + conn_param->private_data_len; + req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!req.private_data) + return -ENOMEM; + + if (conn_param->private_data && conn_param->private_data_len) + memcpy((void *) req.private_data + sizeof(struct cma_hdr), + conn_param->private_data, conn_param->private_data_len); + + route = &id_priv->id.route; + ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route); + if (ret) + goto out; + + id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, + cma_sidr_rep_handler, id_priv); + if (IS_ERR(id_priv->cm_id.ib)) { + ret = PTR_ERR(id_priv->cm_id.ib); + goto out; + } + + req.path = route->path_rec; + req.service_id = cma_get_service_id(id_priv->id.ps, + &route->addr.dst_addr); + req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); + req.max_cm_retries = CMA_MAX_CM_RETRIES; + + ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); + if (ret) { + ib_destroy_cm_id(id_priv->cm_id.ib); + id_priv->cm_id.ib = NULL; + } +out: + kfree(req.private_data); + return ret; +} + static int cma_connect_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { @@ -1860,7 +2036,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.service_id = cma_get_service_id(id_priv->id.ps, &route->addr.dst_addr); req.qp_num = id_priv->qp_num; - req.qp_type = id_priv->qp_type; + req.qp_type = IB_QPT_RC; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; @@ -1937,13 +2113,15 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) if (!id->qp) { id_priv->qp_num = conn_param->qp_num; - id_priv->qp_type = conn_param->qp_type; id_priv->srq = conn_param->srq; } switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_connect_ib(id_priv, conn_param); + if (id->ps == RDMA_PS_UDP) + ret = cma_resolve_ib_udp(id_priv, conn_param); + else + ret = cma_connect_ib(id_priv, conn_param); break; case RDMA_TRANSPORT_IWARP: ret = cma_connect_iw(id_priv, conn_param); @@ -1966,11 +2144,25 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_rep_param rep; - int ret; + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; - ret = cma_modify_qp_rtr(&id_priv->id); - if (ret) - return ret; + if (id_priv->id.qp) { + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + goto out; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr, + &qp_attr_mask); + if (ret) + goto out; + + qp_attr.max_rd_atomic = conn_param->initiator_depth; + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); + if (ret) + goto out; + } memset(&rep, 0, sizeof rep); rep.qp_num = id_priv->qp_num; @@ -1985,7 +2177,9 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.rnr_retry_count = conn_param->rnr_retry_count; rep.srq = id_priv->srq ? 1 : 0; - return ib_send_cm_rep(id_priv->cm_id.ib, &rep); + ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); +out: + return ret; } static int cma_accept_iw(struct rdma_id_private *id_priv, @@ -2010,6 +2204,24 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, return iw_cm_accept(id_priv->cm_id.iw, &iw_param); } +static int cma_send_sidr_rep(struct rdma_id_private *id_priv, + enum ib_cm_sidr_status status, + const void *private_data, int private_data_len) +{ + struct ib_cm_sidr_rep_param rep; + + memset(&rep, 0, sizeof rep); + rep.status = status; + if (status == IB_SIDR_SUCCESS) { + rep.qp_num = id_priv->qp_num; + rep.qkey = RDMA_UD_QKEY; + } + rep.private_data = private_data; + rep.private_data_len = private_data_len; + + return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); +} + int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -2021,13 +2233,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) if (!id->qp && conn_param) { id_priv->qp_num = conn_param->qp_num; - id_priv->qp_type = conn_param->qp_type; id_priv->srq = conn_param->srq; } switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (conn_param) + if (id->ps == RDMA_PS_UDP) + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->private_data, + conn_param->private_data_len); + else if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); @@ -2051,6 +2266,27 @@ reject: } EXPORT_SYMBOL(rdma_accept); +int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_CONNECT)) + return -EINVAL; + + switch (id->device->node_type) { + case RDMA_NODE_IB_CA: + ret = ib_cm_notify(id_priv->cm_id.ib, event); + break; + default: + ret = 0; + break; + } + return ret; +} +EXPORT_SYMBOL(rdma_notify); + int rdma_reject(struct rdma_cm_id *id, const void *private_data, u8 private_data_len) { @@ -2063,9 +2299,13 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = ib_send_cm_rej(id_priv->cm_id.ib, - IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, - private_data, private_data_len); + if (id->ps == RDMA_PS_UDP) + ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, + private_data, private_data_len); + else + ret = ib_send_cm_rej(id_priv->cm_id.ib, + IB_CM_REJ_CONSUMER_DEFINED, NULL, + 0, private_data, private_data_len); break; case RDMA_TRANSPORT_IWARP: ret = iw_cm_reject(id_priv->cm_id.iw, @@ -2136,6 +2376,7 @@ static void cma_add_one(struct ib_device *device) static int cma_remove_id_dev(struct rdma_id_private *id_priv) { + struct rdma_cm_event event; enum cma_state state; /* Record that we want to remove the device */ @@ -2150,8 +2391,9 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) return 0; - return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL, - 0, NULL, 0); + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; + return id_priv->id.event_handler(&id_priv->id, &event); } static void cma_process_remove(struct cma_device *cma_dev) @@ -2233,6 +2475,7 @@ static void cma_cleanup(void) destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); + idr_destroy(&udp_ps); } module_init(cma_init); diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 86a3b2d401db..8926a2bd4a87 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -394,20 +394,12 @@ EXPORT_SYMBOL(ib_destroy_fmr_pool); */ int ib_flush_fmr_pool(struct ib_fmr_pool *pool) { - int serial; - - atomic_inc(&pool->req_ser); - /* - * It's OK if someone else bumps req_ser again here -- we'll - * just wait a little longer. - */ - serial = atomic_read(&pool->req_ser); + int serial = atomic_inc_return(&pool->req_ser); wake_up_process(pool->thread); if (wait_event_interruptible(pool->force_wait, - atomic_read(&pool->flush_ser) - - atomic_read(&pool->req_ser) >= 0)) + atomic_read(&pool->flush_ser) - serial >= 0)) return -EINTR; return 0; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 15f38d94b3a8..13efd4170349 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -642,7 +642,8 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&qp_info->snoop_lock, flags); } -static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, +static void build_smp_wc(struct ib_qp *qp, + u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, struct ib_wc *wc) { memset(wc, 0, sizeof *wc); @@ -652,7 +653,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, wc->pkey_index = pkey_index; wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); wc->src_qp = IB_QP0; - wc->qp_num = IB_QP0; + wc->qp = qp; wc->slid = slid; wc->sl = 0; wc->dlid_path_bits = 0; @@ -713,7 +714,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid), + build_smp_wc(mad_agent_priv->agent.qp, + send_wr->wr_id, be16_to_cpu(smp->dr_slid), send_wr->wr.ud.pkey_index, send_wr->wr.ud.port_num, &mad_wc); @@ -998,17 +1000,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; - sge[0].addr = dma_map_single(mad_agent->device->dma_device, - mad_send_wr->send_buf.mad, - sge[0].length, - DMA_TO_DEVICE); - pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr); - - sge[1].addr = dma_map_single(mad_agent->device->dma_device, - ib_get_payload(mad_send_wr), - sge[1].length, - DMA_TO_DEVICE); - pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr); + sge[0].addr = ib_dma_map_single(mad_agent->device, + mad_send_wr->send_buf.mad, + sge[0].length, + DMA_TO_DEVICE); + mad_send_wr->header_mapping = sge[0].addr; + + sge[1].addr = ib_dma_map_single(mad_agent->device, + ib_get_payload(mad_send_wr), + sge[1].length, + DMA_TO_DEVICE); + mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { @@ -1026,12 +1028,12 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); if (ret) { - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, header_mapping), - sge[0].length, DMA_TO_DEVICE); - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, payload_mapping), - sge[1].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->payload_mapping, + sge[1].length, DMA_TO_DEVICE); } return ret; } @@ -1850,11 +1852,11 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); - dma_unmap_single(port_priv->device->dma_device, - pci_unmap_addr(&recv->header, mapping), - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), - DMA_FROM_DEVICE); + ib_dma_unmap_single(port_priv->device, + recv->header.mapping, + sizeof(struct ib_mad_private) - + sizeof(struct ib_mad_private_header), + DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; @@ -2080,12 +2082,12 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, qp_info = send_queue->qp_info; retry: - dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, header_mapping), - mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); - dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, - pci_unmap_addr(mad_send_wr, payload_mapping), - mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->header_mapping, + mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->payload_mapping, + mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); @@ -2355,7 +2357,8 @@ static void local_completions(struct work_struct *work) * Defined behavior is to complete response * before request */ - build_smp_wc((unsigned long) local->mad_send_wr, + build_smp_wc(recv_mad_agent->agent.qp, + (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), 0, recv_mad_agent->agent.port_num, &wc); @@ -2528,13 +2531,12 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, break; } } - sg_list.addr = dma_map_single(qp_info->port_priv-> - device->dma_device, - &mad_priv->grh, - sizeof *mad_priv - - sizeof mad_priv->header, - DMA_FROM_DEVICE); - pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); + sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, + &mad_priv->grh, + sizeof *mad_priv - + sizeof mad_priv->header, + DMA_FROM_DEVICE); + mad_priv->header.mapping = sg_list.addr; recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; mad_priv->header.mad_list.mad_queue = recv_queue; @@ -2549,12 +2551,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, list_del(&mad_priv->header.mad_list.list); recv_queue->count--; spin_unlock_irqrestore(&recv_queue->lock, flags); - dma_unmap_single(qp_info->port_priv->device->dma_device, - pci_unmap_addr(&mad_priv->header, - mapping), - sizeof *mad_priv - - sizeof mad_priv->header, - DMA_FROM_DEVICE); + ib_dma_unmap_single(qp_info->port_priv->device, + mad_priv->header.mapping, + sizeof *mad_priv - + sizeof mad_priv->header, + DMA_FROM_DEVICE); kmem_cache_free(ib_mad_cache, mad_priv); printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); break; @@ -2586,11 +2587,11 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) /* Remove from posted receive MAD list */ list_del(&mad_list->list); - dma_unmap_single(qp_info->port_priv->device->dma_device, - pci_unmap_addr(&recv->header, mapping), - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), - DMA_FROM_DEVICE); + ib_dma_unmap_single(qp_info->port_priv->device, + recv->header.mapping, + sizeof(struct ib_mad_private) - + sizeof(struct ib_mad_private_header), + DMA_FROM_DEVICE); kmem_cache_free(ib_mad_cache, recv); } diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index d5548e73e068..de89717f49fe 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -73,7 +73,7 @@ struct ib_mad_private_header { struct ib_mad_list_head mad_list; struct ib_mad_recv_wc recv_wc; struct ib_wc wc; - DECLARE_PCI_UNMAP_ADDR(mapping) + u64 mapping; } __attribute__ ((packed)); struct ib_mad_private { @@ -126,8 +126,8 @@ struct ib_mad_send_wr_private { struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf send_buf; - DECLARE_PCI_UNMAP_ADDR(header_mapping) - DECLARE_PCI_UNMAP_ADDR(payload_mapping) + u64 header_mapping; + u64 payload_mapping; struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c new file mode 100644 index 000000000000..e2e8d329b443 --- /dev/null +++ b/drivers/infiniband/core/ucma.c @@ -0,0 +1,885 @@ +/* + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/completion.h> +#include <linux/mutex.h> +#include <linux/poll.h> +#include <linux/idr.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/miscdevice.h> + +#include <rdma/rdma_user_cm.h> +#include <rdma/ib_marshall.h> +#include <rdma/rdma_cm.h> + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); +MODULE_LICENSE("Dual BSD/GPL"); + +enum { + UCMA_MAX_BACKLOG = 128 +}; + +struct ucma_file { + struct mutex mut; + struct file *filp; + struct list_head ctx_list; + struct list_head event_list; + wait_queue_head_t poll_wait; +}; + +struct ucma_context { + int id; + struct completion comp; + atomic_t ref; + int events_reported; + int backlog; + + struct ucma_file *file; + struct rdma_cm_id *cm_id; + u64 uid; + + struct list_head list; +}; + +struct ucma_event { + struct ucma_context *ctx; + struct list_head list; + struct rdma_cm_id *cm_id; + struct rdma_ucm_event_resp resp; +}; + +static DEFINE_MUTEX(mut); +static DEFINE_IDR(ctx_idr); + +static inline struct ucma_context *_ucma_find_context(int id, + struct ucma_file *file) +{ + struct ucma_context *ctx; + + ctx = idr_find(&ctx_idr, id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + return ctx; +} + +static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) +{ + struct ucma_context *ctx; + + mutex_lock(&mut); + ctx = _ucma_find_context(id, file); + if (!IS_ERR(ctx)) + atomic_inc(&ctx->ref); + mutex_unlock(&mut); + return ctx; +} + +static void ucma_put_ctx(struct ucma_context *ctx) +{ + if (atomic_dec_and_test(&ctx->ref)) + complete(&ctx->comp); +} + +static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) +{ + struct ucma_context *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + atomic_set(&ctx->ref, 1); + init_completion(&ctx->comp); + ctx->file = file; + + do { + ret = idr_pre_get(&ctx_idr, GFP_KERNEL); + if (!ret) + goto error; + + mutex_lock(&mut); + ret = idr_get_new(&ctx_idr, ctx, &ctx->id); + mutex_unlock(&mut); + } while (ret == -EAGAIN); + + if (ret) + goto error; + + list_add_tail(&ctx->list, &file->ctx_list); + return ctx; + +error: + kfree(ctx); + return NULL; +} + +static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, + struct rdma_conn_param *src) +{ + if (src->private_data_len) + memcpy(dst->private_data, src->private_data, + src->private_data_len); + dst->private_data_len = src->private_data_len; + dst->responder_resources =src->responder_resources; + dst->initiator_depth = src->initiator_depth; + dst->flow_control = src->flow_control; + dst->retry_count = src->retry_count; + dst->rnr_retry_count = src->rnr_retry_count; + dst->srq = src->srq; + dst->qp_num = src->qp_num; +} + +static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst, + struct rdma_ud_param *src) +{ + if (src->private_data_len) + memcpy(dst->private_data, src->private_data, + src->private_data_len); + dst->private_data_len = src->private_data_len; + ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); + dst->qp_num = src->qp_num; + dst->qkey = src->qkey; +} + +static void ucma_set_event_context(struct ucma_context *ctx, + struct rdma_cm_event *event, + struct ucma_event *uevent) +{ + uevent->ctx = ctx; + uevent->resp.uid = ctx->uid; + uevent->resp.id = ctx->id; +} + +static int ucma_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct ucma_event *uevent; + struct ucma_context *ctx = cm_id->context; + int ret = 0; + + uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); + if (!uevent) + return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; + + uevent->cm_id = cm_id; + ucma_set_event_context(ctx, event, uevent); + uevent->resp.event = event->event; + uevent->resp.status = event->status; + if (cm_id->ps == RDMA_PS_UDP) + ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); + else + ucma_copy_conn_event(&uevent->resp.param.conn, + &event->param.conn); + + mutex_lock(&ctx->file->mut); + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { + if (!ctx->backlog) { + ret = -EDQUOT; + kfree(uevent); + goto out; + } + ctx->backlog--; + } else if (!ctx->uid) { + /* + * We ignore events for new connections until userspace has set + * their context. This can only happen if an error occurs on a + * new connection before the user accepts it. This is okay, + * since the accept will just fail later. + */ + kfree(uevent); + goto out; + } + + list_add_tail(&uevent->list, &ctx->file->event_list); + wake_up_interruptible(&ctx->file->poll_wait); +out: + mutex_unlock(&ctx->file->mut); + return ret; +} + +static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct ucma_context *ctx; + struct rdma_ucm_get_event cmd; + struct ucma_event *uevent; + int ret = 0; + DEFINE_WAIT(wait); + + if (out_len < sizeof uevent->resp) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&file->mut); + while (list_empty(&file->event_list)) { + if (file->filp->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } + + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE); + mutex_unlock(&file->mut); + schedule(); + mutex_lock(&file->mut); + finish_wait(&file->poll_wait, &wait); + } + + if (ret) + goto done; + + uevent = list_entry(file->event_list.next, struct ucma_event, list); + + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { + ctx = ucma_alloc_ctx(file); + if (!ctx) { + ret = -ENOMEM; + goto done; + } + uevent->ctx->backlog++; + ctx->cm_id = uevent->cm_id; + ctx->cm_id->context = ctx; + uevent->resp.id = ctx->id; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &uevent->resp, sizeof uevent->resp)) { + ret = -EFAULT; + goto done; + } + + list_del(&uevent->list); + uevent->ctx->events_reported++; + kfree(uevent); +done: + mutex_unlock(&file->mut); + return ret; +} + +static ssize_t ucma_create_id(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_create_id cmd; + struct rdma_ucm_create_id_resp resp; + struct ucma_context *ctx; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&file->mut); + ctx = ucma_alloc_ctx(file); + mutex_unlock(&file->mut); + if (!ctx) + return -ENOMEM; + + ctx->uid = cmd.uid; + ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps); + if (IS_ERR(ctx->cm_id)) { + ret = PTR_ERR(ctx->cm_id); + goto err1; + } + + resp.id = ctx->id; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err2; + } + return 0; + +err2: + rdma_destroy_id(ctx->cm_id); +err1: + mutex_lock(&mut); + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&mut); + kfree(ctx); + return ret; +} + +static void ucma_cleanup_events(struct ucma_context *ctx) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { + if (uevent->ctx != ctx) + continue; + + list_del(&uevent->list); + + /* clear incoming connections. */ + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) + rdma_destroy_id(uevent->cm_id); + + kfree(uevent); + } +} + +static int ucma_free_ctx(struct ucma_context *ctx) +{ + int events_reported; + + /* No new events will be generated after destroying the id. */ + rdma_destroy_id(ctx->cm_id); + + /* Cleanup events not yet reported to the user. */ + mutex_lock(&ctx->file->mut); + ucma_cleanup_events(ctx); + list_del(&ctx->list); + mutex_unlock(&ctx->file->mut); + + events_reported = ctx->events_reported; + kfree(ctx); + return events_reported; +} + +static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_destroy_id cmd; + struct rdma_ucm_destroy_id_resp resp; + struct ucma_context *ctx; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&mut); + ctx = _ucma_find_context(cmd.id, file); + if (!IS_ERR(ctx)) + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&mut); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); + resp.events_reported = ucma_free_ctx(ctx); + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_bind_addr cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_addr(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_addr cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, + (struct sockaddr *) &cmd.dst_addr, + cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_route(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_route cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + + resp->num_paths = route->num_paths; + switch (route->num_paths) { + case 0: + dev_addr = &route->addr.dev_addr; + ib_addr_get_dgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].dgid); + ib_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); + resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); + break; + case 2: + ib_copy_path_rec_to_user(&resp->ib_route[1], + &route->path_rec[1]); + /* fall through */ + case 1: + ib_copy_path_rec_to_user(&resp->ib_route[0], + &route->path_rec[0]); + break; + default: + break; + } +} + +static ssize_t ucma_query_route(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_query_route cmd; + struct rdma_ucm_query_route_resp resp; + struct ucma_context *ctx; + struct sockaddr *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + memset(&resp, 0, sizeof resp); + addr = &ctx->cm_id->route.addr.src_addr; + memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + addr = &ctx->cm_id->route.addr.dst_addr; + memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + if (!ctx->cm_id->device) + goto out; + + resp.node_guid = ctx->cm_id->device->node_guid; + resp.port_num = ctx->cm_id->port_num; + switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { + case RDMA_TRANSPORT_IB: + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + break; + default: + break; + } + +out: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_conn_param(struct rdma_conn_param *dst, + struct rdma_ucm_conn_param *src) +{ + dst->private_data = src->private_data; + dst->private_data_len = src->private_data_len; + dst->responder_resources =src->responder_resources; + dst->initiator_depth = src->initiator_depth; + dst->flow_control = src->flow_control; + dst->retry_count = src->retry_count; + dst->rnr_retry_count = src->rnr_retry_count; + dst->srq = src->srq; + dst->qp_num = src->qp_num; +} + +static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_connect cmd; + struct rdma_conn_param conn_param; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + if (!cmd.conn_param.valid) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ret = rdma_connect(ctx->cm_id, &conn_param); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_listen cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ? + cmd.backlog : UCMA_MAX_BACKLOG; + ret = rdma_listen(ctx->cm_id, ctx->backlog); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_accept cmd; + struct rdma_conn_param conn_param; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (cmd.conn_param.valid) { + ctx->uid = cmd.uid; + ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ret = rdma_accept(ctx->cm_id, &conn_param); + } else + ret = rdma_accept(ctx->cm_id, NULL); + + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_reject cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_disconnect cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_disconnect(ctx->cm_id); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_init_qp_attr(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_init_qp_attr cmd; + struct ib_uverbs_qp_attr resp; + struct ucma_context *ctx; + struct ib_qp_attr qp_attr; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + resp.qp_attr_mask = 0; + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.qp_state = cmd.qp_state; + ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + if (ret) + goto out; + + ib_copy_qp_attr_to_user(&resp, &qp_attr); + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + +out: + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_notify cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) = { + [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, + [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, + [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr, + [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, + [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route, + [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, + [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, + [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, + [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, + [RDMA_USER_CM_CMD_REJECT] = ucma_reject, + [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, + [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, + [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, + [RDMA_USER_CM_CMD_GET_OPTION] = NULL, + [RDMA_USER_CM_CMD_SET_OPTION] = NULL, + [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, +}; + +static ssize_t ucma_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct ucma_file *file = filp->private_data; + struct rdma_ucm_cmd_hdr hdr; + ssize_t ret; + + if (len < sizeof(hdr)) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof(hdr))) + return -EFAULT; + + if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) + return -EINVAL; + + if (hdr.in + sizeof(hdr) > len) + return -EINVAL; + + if (!ucma_cmd_table[hdr.cmd]) + return -ENOSYS; + + ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); + if (!ret) + ret = len; + + return ret; +} + +static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct ucma_file *file = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &file->poll_wait, wait); + + if (!list_empty(&file->event_list)) + mask = POLLIN | POLLRDNORM; + + return mask; +} + +static int ucma_open(struct inode *inode, struct file *filp) +{ + struct ucma_file *file; + + file = kmalloc(sizeof *file, GFP_KERNEL); + if (!file) + return -ENOMEM; + + INIT_LIST_HEAD(&file->event_list); + INIT_LIST_HEAD(&file->ctx_list); + init_waitqueue_head(&file->poll_wait); + mutex_init(&file->mut); + + filp->private_data = file; + file->filp = filp; + return 0; +} + +static int ucma_close(struct inode *inode, struct file *filp) +{ + struct ucma_file *file = filp->private_data; + struct ucma_context *ctx, *tmp; + + mutex_lock(&file->mut); + list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { + mutex_unlock(&file->mut); + + mutex_lock(&mut); + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&mut); + + ucma_free_ctx(ctx); + mutex_lock(&file->mut); + } + mutex_unlock(&file->mut); + kfree(file); + return 0; +} + +static struct file_operations ucma_fops = { + .owner = THIS_MODULE, + .open = ucma_open, + .release = ucma_close, + .write = ucma_write, + .poll = ucma_poll, +}; + +static struct miscdevice ucma_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "rdma_cm", + .fops = &ucma_fops, +}; + +static ssize_t show_abi_version(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); +} +static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static int __init ucma_init(void) +{ + int ret; + + ret = misc_register(&ucma_misc); + if (ret) + return ret; + + ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); + if (ret) { + printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); + goto err; + } + return 0; +err: + misc_deregister(&ucma_misc); + return ret; +} + +static void __exit ucma_cleanup(void) +{ + device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); + misc_deregister(&ucma_misc); + idr_destroy(&ctx_idr); +} + +module_init(ucma_init); +module_exit(ucma_cleanup); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 743247ec065e..df1efbc10882 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -933,7 +933,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, resp->wc[i].vendor_err = wc[i].vendor_err; resp->wc[i].byte_len = wc[i].byte_len; resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; - resp->wc[i].qp_num = wc[i].qp_num; + resp->wc[i].qp_num = wc[i].qp->qp_num; resp->wc[i].src_qp = wc[i].src_qp; resp->wc[i].wc_flags = wc[i].wc_flags; resp->wc[i].pkey_index = wc[i].pkey_index; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4e16314e8e6d..a617ca7b6923 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -534,9 +534,9 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, * module reference. */ filp->f_op = fops_get(&uverbs_event_fops); - filp->f_vfsmnt = mntget(uverbs_event_mnt); - filp->f_dentry = dget(uverbs_event_mnt->mnt_root); - filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_path.mnt = mntget(uverbs_event_mnt); + filp->f_path.dentry = dget(uverbs_event_mnt->mnt_root); + filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; filp->private_data = ev_file; diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index ce46b13ae02b..5440da0e59b4 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -32,8 +32,8 @@ #include <rdma/ib_marshall.h> -static void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, - struct ib_ah_attr *src) +void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, + struct ib_ah_attr *src) { memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid); dst->grh.flow_label = src->grh.flow_label; @@ -47,6 +47,7 @@ static void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; dst->port_num = src->port_num; } +EXPORT_SYMBOL(ib_copy_ah_attr_to_user); void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src) diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c index db12cc0841df..c95fe952abd5 100644 --- a/drivers/infiniband/core/uverbs_mem.c +++ b/drivers/infiniband/core/uverbs_mem.c @@ -52,8 +52,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d int i; list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { - dma_unmap_sg(dev->dma_device, chunk->page_list, - chunk->nents, DMA_BIDIRECTIONAL); + ib_dma_unmap_sg(dev, chunk->page_list, + chunk->nents, DMA_BIDIRECTIONAL); for (i = 0; i < chunk->nents; ++i) { if (umem->writable && dirty) set_page_dirty_lock(chunk->page_list[i].page); @@ -136,10 +136,10 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, chunk->page_list[i].length = PAGE_SIZE; } - chunk->nmap = dma_map_sg(dev->dma_device, - &chunk->page_list[0], - chunk->nents, - DMA_BIDIRECTIONAL); + chunk->nmap = ib_dma_map_sg(dev, + &chunk->page_list[0], + chunk->nents, + DMA_BIDIRECTIONAL); if (chunk->nmap <= 0) { for (i = 0; i < chunk->nents; ++i) put_page(chunk->page_list[i].page); diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index 05c9154d46f4..5175c99ee586 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -153,7 +153,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev, entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce)); entry->wr_id = ce->hdr.context; - entry->qp_num = ce->handle; + entry->qp = &qp->ibqp; entry->wc_flags = 0; entry->slid = 0; entry->sl = 0; diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index 179d005ed4a5..420c1380f5c3 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -161,8 +161,10 @@ int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, if (attr_mask & IB_QP_STATE) { /* Ensure the state is valid */ - if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) - return -EINVAL; + if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) { + err = -EINVAL; + goto bail0; + } wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state)); @@ -184,9 +186,10 @@ int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, if (attr->cur_qp_state != IB_QPS_RTR && attr->cur_qp_state != IB_QPS_RTS && attr->cur_qp_state != IB_QPS_SQD && - attr->cur_qp_state != IB_QPS_SQE) - return -EINVAL; - else + attr->cur_qp_state != IB_QPS_SQE) { + err = -EINVAL; + goto bail0; + } else wr.next_qp_state = cpu_to_be32(to_c2_state(attr->cur_qp_state)); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1c722032319c..cf95ee474b0f 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -119,13 +119,14 @@ struct ehca_qp { struct ipz_qp_handle ipz_qp_handle; struct ehca_pfqp pf; struct ib_qp_init_attr init_attr; - u64 uspace_squeue; - u64 uspace_rqueue; - u64 uspace_fwh; struct ehca_cq *send_cq; struct ehca_cq *recv_cq; unsigned int sqerr_purgeflag; struct hlist_node list_entries; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; }; /* must be power of 2 */ @@ -142,13 +143,14 @@ struct ehca_cq { struct ipz_cq_handle ipz_cq_handle; struct ehca_pfcq pf; spinlock_t cb_lock; - u64 uspace_queue; - u64 uspace_fwh; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; u32 nr_callbacks; spinlock_t task_lock; u32 ownpid; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; }; enum ehca_mr_flag { @@ -248,20 +250,6 @@ struct ehca_ucontext { struct ib_ucontext ib_ucontext; }; -struct ehca_module *ehca_module_new(void); - -int ehca_module_delete(struct ehca_module *me); - -int ehca_eq_ctor(struct ehca_eq *eq); - -int ehca_eq_dtor(struct ehca_eq *eq); - -struct ehca_shca *ehca_shca_new(void); - -int ehca_shca_delete(struct ehca_shca *me); - -struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor); - int ehca_init_pd_cache(void); void ehca_cleanup_pd_cache(void); int ehca_init_cq_cache(void); @@ -283,7 +271,6 @@ extern int ehca_port_act_time; extern int ehca_use_hp_mr; struct ipzu_queue_resp { - u64 queue; /* points to first queue entry */ u32 qe_size; /* queue entry size */ u32 act_nr_of_sg; u32 queue_length; /* queue length allocated in bytes */ @@ -296,7 +283,6 @@ struct ehca_create_cq_resp { u32 cq_number; u32 token; struct ipzu_queue_resp ipz_queue; - struct h_galpas galpas; }; struct ehca_create_qp_resp { @@ -309,7 +295,6 @@ struct ehca_create_qp_resp { u32 dummy; /* padding for 8 byte alignment */ struct ipzu_queue_resp ipz_squeue; struct ipzu_queue_resp ipz_rqueue; - struct h_galpas galpas; }; struct ehca_alloc_cq_parms { diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 93995b658d94..9291a86ca053 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; - struct vm_area_struct *vma; memset(&resp, 0, sizeof(resp)); resp.cq_number = my_cq->cq_number; resp.token = my_cq->token; @@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, resp.ipz_queue.queue_length = ipz_queue->queue_length; resp.ipz_queue.pagesize = ipz_queue->pagesize; resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000, - ipz_queue->queue_length, - (void**)&resp.ipz_queue.queue, - &vma); - if (ret) { - ehca_err(device, "Could not mmap queue pages"); - cq = ERR_PTR(ret); - goto create_cq_exit4; - } - my_cq->uspace_queue = resp.ipz_queue.queue; - resp.galpas = my_cq->galpas; - ret = ehca_mmap_register(my_cq->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(device, "Could not mmap fw_handle"); - cq = ERR_PTR(ret); - goto create_cq_exit5; - } - my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); - goto create_cq_exit6; + goto create_cq_exit4; } } return cq; -create_cq_exit6: - ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - -create_cq_exit5: - ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length); - create_cq_exit4: ipz_queue_dtor(&my_cq->ipz_queue); @@ -333,7 +306,6 @@ create_cq_exit1: int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; - int ret; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int cq_num = my_cq->cq_number; struct ib_device *device = cq->device; @@ -343,32 +315,30 @@ int ehca_destroy_cq(struct ib_cq *cq) u32 cur_pid = current->tgid; unsigned long flags; + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + if (my_cq->ownpid != cur_pid) { + ehca_err(device, "Invalid caller pid=%x ownpid=%x " + "cq_num=%x", + cur_pid, my_cq->ownpid, my_cq->cq_number); + return -EINVAL; + } + } + spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_callbacks) + while (my_cq->nr_callbacks) { + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); yield(); + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + } idr_remove(&ehca_cq_idr, my_cq->token); spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { - ehca_err(device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_cq->ownpid); - return -EINVAL; - } - - /* un-mmap if vma alloc */ - if (my_cq->uspace_queue ) { - ret = ehca_munmap(my_cq->uspace_queue, - my_cq->ipz_queue.queue_length); - if (ret) - ehca_err(device, "Could not munmap queue ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(device, "Could not munmap fwh ehca_cq=%p " - "cq_num=%x", my_cq, cq_num); - } - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ @@ -397,7 +367,7 @@ int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); u32 cur_pid = current->tgid; - if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + if (cq->uobject && my_cq->ownpid != cur_pid) { ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x", cur_pid, my_cq->ownpid); return -EINVAL; diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index e1b618c5f685..b7be950ab47c 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -50,7 +50,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) ib_device); struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -110,7 +110,7 @@ int ehca_query_port(struct ib_device *ibdev, ib_device); struct hipz_query_port *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -179,7 +179,7 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return -EINVAL; } - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -212,7 +212,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, return -EINVAL; } - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index c3ea746e9045..c069be8cbcb2 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -138,7 +138,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, u64 *rblock; unsigned long block_count; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); if (!rblock) { ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); ret = -ENOMEM; @@ -440,7 +440,8 @@ void ehca_tasklet_eq(unsigned long data) cq = idr_find(&ehca_cq_idr, token); if (cq == NULL) { - spin_unlock(&ehca_cq_idr_lock); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); break; } diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 3720e3032cce..95fd59fb4528 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -171,19 +171,11 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void ehca_poll_eqs(unsigned long data); -int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped, - struct vm_area_struct **vma); - -int ehca_mmap_register(u64 physical,void **mapped, - struct vm_area_struct **vma); - -int ehca_munmap(unsigned long addr, size_t len); - #ifdef CONFIG_PPC_64K_PAGES -void *ehca_alloc_fw_ctrlblock(void); +void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock() ((void *) get_zeroed_page(GFP_KERNEL)) +#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index cc47e4c13a18..1155bcf48212 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0019"); +MODULE_VERSION("SVNEHCA_0020"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -106,9 +106,9 @@ static struct timer_list poll_eqs_timer; #ifdef CONFIG_PPC_64K_PAGES static struct kmem_cache *ctblk_cache = NULL; -void *ehca_alloc_fw_ctrlblock(void) +void *ehca_alloc_fw_ctrlblock(gfp_t flags) { - void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL); + void *ret = kmem_cache_zalloc(ctblk_cache, flags); if (!ret) ehca_gen_err("Out of memory for ctblk"); return ret; @@ -206,7 +206,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) u64 h_ret; struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_gen_err("Cannot allocate rblock memory."); return -ENOMEM; @@ -258,7 +258,7 @@ static int init_node_guid(struct ehca_shca *shca) int ret = 0; struct hipz_query_hca *rblock; - rblock = ehca_alloc_fw_ctrlblock(); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); return -ENOMEM; @@ -288,7 +288,7 @@ int ehca_init_device(struct ehca_shca *shca) strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 5; + shca->ib_device.uverbs_abi_ver = 6; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -469,7 +469,7 @@ static ssize_t ehca_show_##name(struct device *dev, \ \ shca = dev->driver_data; \ \ - rblock = ehca_alloc_fw_ctrlblock(); \ + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ if (!rblock) { \ dev_err(dev, "Can't allocate rblock memory."); \ return 0; \ @@ -790,7 +790,7 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0019)\n"); + "(Rel.: SVNEHCA_0020)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 0a5e2214cc5f..cfb362a1029c 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -1013,7 +1013,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, u32 i; u64 *kpage; - kpage = ehca_alloc_fw_ctrlblock(); + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; @@ -1124,7 +1124,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); - kpage = ehca_alloc_fw_ctrlblock(); + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!kpage) { ehca_err(&shca->ib_device, "kpage alloc failed"); ret = -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index c6c9cef203e3..95efef921f1d 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -637,7 +637,6 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; struct ehca_create_qp_resp resp; - struct vm_area_struct * vma; memset(&resp, 0, sizeof(resp)); resp.qp_num = my_qp->real_qp_num; @@ -651,59 +650,21 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000, - ipz_rqueue->queue_length, - (void**)&resp.ipz_rqueue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap rqueue pages"); - goto create_qp_exit3; - } - my_qp->uspace_rqueue = resp.ipz_rqueue.queue; /* squeue properties */ resp.ipz_squeue.qe_size = ipz_squeue->qe_size; resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; resp.ipz_squeue.queue_length = ipz_squeue->queue_length; resp.ipz_squeue.pagesize = ipz_squeue->pagesize; resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; - ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000, - ipz_squeue->queue_length, - (void**)&resp.ipz_squeue.queue, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap squeue pages"); - goto create_qp_exit4; - } - my_qp->uspace_squeue = resp.ipz_squeue.queue; - /* fw_handle */ - resp.galpas = my_qp->galpas; - ret = ehca_mmap_register(my_qp->galpas.user.fw_handle, - (void**)&resp.galpas.kernel.fw_handle, - &vma); - if (ret) { - ehca_err(pd->device, "Could not mmap fw_handle"); - goto create_qp_exit5; - } - my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit6; + goto create_qp_exit3; } } return &my_qp->ib_qp; -create_qp_exit6: - ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - -create_qp_exit5: - ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length); - -create_qp_exit4: - ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length); - create_qp_exit3: ipz_queue_dtor(&my_qp->ipz_rqueue); ipz_queue_dtor(&my_qp->ipz_squeue); @@ -807,7 +768,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, unsigned long spl_flags = 0; /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(); + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!mqpcb) { ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); @@ -931,7 +892,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, my_qp->qp_type == IB_QPT_SMI) && statetrans == IB_QPST_SQE2RTS) { /* mark next free wqe if kernel */ - if (my_qp->uspace_squeue == 0) { + if (!ibqp->uobject) { struct ehca_wqe *wqe; /* lock send queue */ spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); @@ -1273,7 +1234,7 @@ int ehca_query_qp(struct ib_qp *qp, return -EINVAL; } - qpcb = ehca_alloc_fw_ctrlblock(); + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { ehca_err(qp->device,"Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); @@ -1417,11 +1378,18 @@ int ehca_destroy_qp(struct ib_qp *ibqp) enum ib_qp_type qp_type; unsigned long flags; - if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && - my_pd->ownpid != cur_pid) { - ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", - cur_pid, my_pd->ownpid); - return -EINVAL; + if (ibqp->uobject) { + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(ibqp->device, "Resources still referenced in " + "user space qp_num=%x", ibqp->qp_num); + return -EINVAL; + } + if (my_pd->ownpid != cur_pid) { + ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } } if (my_qp->send_cq) { @@ -1439,24 +1407,6 @@ int ehca_destroy_qp(struct ib_qp *ibqp) idr_remove(&ehca_qp_idr, my_qp->token); spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); - /* un-mmap if vma alloc */ - if (my_qp->uspace_rqueue) { - ret = ehca_munmap(my_qp->uspace_rqueue, - my_qp->ipz_rqueue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap rqueue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_squeue, - my_qp->ipz_squeue.queue_length); - if (ret) - ehca_err(ibqp->device, "Could not munmap squeue " - "qp_num=%x", qp_num); - ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); - if (ret) - ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x", - qp_num); - } - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index b46bda1bf85d..08d3f892d9f3 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -579,7 +579,7 @@ poll_cq_one_read_cqe: } else wc->status = IB_WC_SUCCESS; - wc->qp_num = cqe->local_qp_number; + wc->qp = NULL; wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index e08764e4aef2..73db920b6945 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -68,105 +68,183 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context) return 0; } -struct page *ehca_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static void ehca_mm_open(struct vm_area_struct *vma) { - struct page *mypage = NULL; - u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; - u32 idr_handle = fileoffset >> 32; - u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ - u32 cur_pid = current->tgid; - unsigned long flags; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ehca_pd *pd; - u64 offset; - void *vaddr; + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} - switch (q_type) { - case 1: /* CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq = idr_find(&ehca_cq_idr, idr_handle); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); +static void ehca_mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32*)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} - /* make sure this mmap really belongs to the authorized user */ - if (!cq) { - ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; +static struct vm_operations_struct vm_ops = { + .open = ehca_mm_open, + .close = ehca_mm_close, +}; + +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) +{ + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical); + /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT, + vsize, vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%x", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; + u64 start, ofs; + struct page *page; + + vma->vm_flags |= VM_RESERVED; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%x", ret); + return ret; } + start += PAGE_SIZE; + } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; - if (cq->ownpid != cur_pid) { + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { ehca_err(cq->ib_cq.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, cq->ownpid); - return NOPAGE_SIGBUS; + "ehca_mmap_fw() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } + break; - if (rsrc_type == 2) { - ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&cq->ipz_queue, offset); - ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); + case 2: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } break; - case 2: /* QP */ - spin_lock_irqsave(&ehca_qp_idr_lock, flags); - qp = idr_find(&ehca_qp_idr, idr_handle); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } - /* make sure this mmap really belongs to the authorized user */ - if (!qp) { - ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; } + break; - pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); - if (pd->ownpid != cur_pid) { + case 2: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + if (unlikely(ret)) { ehca_err(qp->ib_qp.device, - "Invalid caller pid=%x ownpid=%x", - cur_pid, pd->ownpid); - return NOPAGE_SIGBUS; + "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } + break; - if (rsrc_type == 2) { /* rqueue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); - } else if (rsrc_type == 3) { /* squeue */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp); - offset = address - vma->vm_start; - vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset); - ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", - offset, vaddr); - mypage = virt_to_page(vaddr); + case 3: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", + qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } break; default: - ehca_gen_err("bad queue type %x", q_type); - return NOPAGE_SIGBUS; - } - - if (!mypage) { - ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS"); - return NOPAGE_SIGBUS; + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; } - get_page(mypage); - return mypage; + return 0; } -static struct vm_operations_struct ehcau_vm_ops = { - .nopage = ehca_nopage, -}; - int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; @@ -175,7 +253,6 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ u32 cur_pid = current->tgid; u32 ret; - u64 vsize, physical; unsigned long flags; struct ehca_cq *cq; struct ehca_qp *qp; @@ -201,44 +278,12 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq); - vma->vm_flags |= VM_RESERVED; - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(cq->ib_cq.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = cq->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; - - ehca_dbg(cq->ib_cq.device, - "vsize=%lx physical=%lx", vsize, physical); - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (ret) { - ehca_err(cq->ib_cq.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - break; - - case 2: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%x cq_num=%x", + ret, cq->cq_number); + return ret; } break; @@ -262,50 +307,12 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) return -EINVAL; - switch (rsrc_type) { - case 1: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp); - vma->vm_flags |= VM_RESERVED; - vsize = vma->vm_end - vma->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_err(qp->ib_qp.device, "invalid vsize=%lx", - vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = qp->galpas.user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_IO | VM_RESERVED; - - ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx", - vsize, physical); - ret = remap_pfn_range(vma, vma->vm_start, - physical >> PAGE_SHIFT, vsize, - vma->vm_page_prot); - if (ret) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%x", - ret); - return -ENOMEM; - } - break; - - case 2: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - case 3: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp); - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &ehcau_vm_ops; - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type %x", - rsrc_type); - return -EINVAL; + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%x qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; } break; @@ -316,77 +323,3 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return 0; } - -int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped, - struct vm_area_struct **vma) -{ - down_write(¤t->mm->mmap_sem); - *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - foffset); - up_write(¤t->mm->mmap_sem); - if (!(*mapped)) { - ehca_gen_err("couldn't mmap foffset=%lx length=%lx", - foffset, length); - return -EINVAL; - } - - *vma = find_vma(current->mm, (u64)*mapped); - if (!(*vma)) { - down_write(¤t->mm->mmap_sem); - do_munmap(current->mm, 0, length); - up_write(¤t->mm->mmap_sem); - ehca_gen_err("couldn't find vma queue=%p", *mapped); - return -EINVAL; - } - (*vma)->vm_flags |= VM_RESERVED; - (*vma)->vm_ops = &ehcau_vm_ops; - - return 0; -} - -int ehca_mmap_register(u64 physical, void **mapped, - struct vm_area_struct **vma) -{ - int ret; - unsigned long vsize; - /* ehca hw supports only 4k page */ - ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma); - if (ret) { - ehca_gen_err("could'nt mmap physical=%lx", physical); - return ret; - } - - (*vma)->vm_flags |= VM_RESERVED; - vsize = (*vma)->vm_end - (*vma)->vm_start; - if (vsize != EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", - (*vma)->vm_end - (*vma)->vm_start); - return -EINVAL; - } - - (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot); - (*vma)->vm_flags |= VM_IO | VM_RESERVED; - - ret = remap_pfn_range((*vma), (*vma)->vm_start, - physical >> PAGE_SHIFT, vsize, - (*vma)->vm_page_prot); - if (ret) { - ehca_gen_err("remap_pfn_range() failed ret=%x", ret); - return -ENOMEM; - } - - return 0; - -} - -int ehca_munmap(unsigned long addr, size_t len) { - int ret = 0; - struct mm_struct *mm = current->mm; - if (mm) { - down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); - up_write(&mm->mmap_sem); - } - return ret; -} diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index 7dc10551cf18..ec2e603ea241 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o ib_ipath-y := \ ipath_cq.o \ ipath_diag.o \ + ipath_dma.o \ ipath_driver.o \ ipath_eeprom.o \ ipath_file_ops.o \ diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c new file mode 100644 index 000000000000..6e0f2b8918ce --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2006 QLogic, Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_verbs.h> + +#include "ipath_verbs.h" + +#define BAD_DMA_ADDRESS ((u64) 0) + +/* + * The following functions implement driver specific replacements + * for the ib_dma_*() functions. + * + * These functions return kernel virtual addresses instead of + * device bus addresses since the driver uses the CPU to copy + * data instead of using hardware DMA. + */ + +static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == BAD_DMA_ADDRESS; +} + +static u64 ipath_dma_map_single(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + return (u64) cpu_addr; +} + +static void ipath_dma_unmap_single(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static u64 ipath_dma_map_page(struct ib_device *dev, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + BUG_ON(!valid_dma_direction(direction)); + + if (offset + size > PAGE_SIZE) { + addr = BAD_DMA_ADDRESS; + goto done; + } + + addr = (u64) page_address(page); + if (addr) + addr += offset; + /* TODO: handle highmem pages */ + +done: + return addr; +} + +static void ipath_dma_unmap_page(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + u64 addr; + int i; + int ret = nents; + + BUG_ON(!valid_dma_direction(direction)); + + for (i = 0; i < nents; i++) { + addr = (u64) page_address(sg[i].page); + /* TODO: handle highmem pages */ + if (!addr) { + ret = 0; + break; + } + } + return ret; +} + +static void ipath_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) +{ + u64 addr = (u64) page_address(sg->page); + + if (addr) + addr += sg->offset; + return addr; +} + +static unsigned int ipath_sg_dma_len(struct ib_device *dev, + struct scatterlist *sg) +{ + return sg->length; +} + +static void ipath_sync_single_for_cpu(struct ib_device *dev, + u64 addr, + size_t size, + enum dma_data_direction dir) +{ +} + +static void ipath_sync_single_for_device(struct ib_device *dev, + u64 addr, + size_t size, + enum dma_data_direction dir) +{ +} + +static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + if (dma_handle) + *dma_handle = (u64) addr; + return addr; +} + +static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + free_pages((unsigned long) cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops ipath_dma_mapping_ops = { + ipath_mapping_error, + ipath_dma_map_single, + ipath_dma_unmap_single, + ipath_dma_map_page, + ipath_dma_unmap_page, + ipath_map_sg, + ipath_unmap_sg, + ipath_sg_dma_address, + ipath_sg_dma_len, + ipath_sync_single_for_cpu, + ipath_sync_single_for_device, + ipath_dma_alloc_coherent, + ipath_dma_free_coherent +}; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 1aeddb48e355..ae7f21a0cdc0 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1825,8 +1825,6 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, */ void ipath_shutdown_device(struct ipath_devdata *dd) { - u64 val; - ipath_dbg("Shutting down the device\n"); dd->ipath_flags |= IPATH_LINKUNK; @@ -1849,7 +1847,7 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL); /* flush it */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); /* * enough for anything that's going to trickle out to have actually * done so. diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index a9ddc6911f66..b932bcb67a5e 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -699,7 +699,6 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, int start_stop) { struct ipath_devdata *dd = pd->port_dd; - u64 tval; ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", start_stop ? "en" : "dis", dd->ipath_unit, @@ -729,7 +728,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); /* now be sure chip saw it before we return */ - tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); if (start_stop) { /* * And try to be sure that tail reg update has happened too. @@ -738,7 +737,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, * in memory copy, since we could overwrite an update by the * chip if we did. */ - tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); + ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); } /* always; new head should be equal to new tail; see above */ bail: @@ -1745,9 +1744,9 @@ static int ipath_assign_port(struct file *fp, goto done; } - i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE; + i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)fp->f_dentry->d_inode->i_rdev, i_minor); + (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); if (i_minor) ret = find_free_port(i_minor - 1, fp, uinfo); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index d9ff283f725e..79a60f020a21 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -118,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, u16 i; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->i_private; + dd = file->f_path.dentry->d_inode->i_private; for (i = 0; i < NUM_COUNTERS; i++) counters[i] = ipath_snap_cntr(dd, i); @@ -138,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, struct ipath_devdata *dd; u64 guid; - dd = file->f_dentry->d_inode->i_private; + dd = file->f_path.dentry->d_inode->i_private; guid = be64_to_cpu(dd->ipath_guid); @@ -177,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, u32 tmp, tmp2; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->i_private; + dd = file->f_path.dentry->d_inode->i_private; /* so we only initialize non-zero fields. */ memset(portinfo, 0, sizeof portinfo); @@ -324,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_dentry->d_inode->i_private; + dd = file->f_path.dentry->d_inode->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -377,7 +377,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_dentry->d_inode->i_private; + dd = file->f_path.dentry->d_inode->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index e57c7a351cb5..7468477ba837 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -1447,7 +1447,7 @@ static void ipath_ht_tidtemplate(struct ipath_devdata *dd) static int ipath_ht_early_init(struct ipath_devdata *dd) { u32 __iomem *piobuf; - u32 pioincr, val32, egrsize; + u32 pioincr, val32; int i; /* @@ -1467,7 +1467,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) * errors interrupts if we ever see one). */ dd->ipath_rcvegrbufsize = dd->ipath_piosize2k; - egrsize = dd->ipath_rcvegrbufsize; /* * the min() check here is currently a nop, but it may not diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 6af89683f710..ae8bf9950c6d 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -602,7 +602,7 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) */ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) { - u64 val, tmp, config1, prev_val; + u64 val, config1, prev_val; int ret = 0; ipath_dbg("Trying to bringup serdes\n"); @@ -633,7 +633,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) | INFINIPATH_SERDC0_L1PWR_DN; ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); /* be sure chip saw it */ - tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); udelay(5); /* need pll reset set at least for a bit */ /* * after PLL is reset, set the per-lane Resets and TxIdle and @@ -647,7 +647,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) "and txidle (%llx)\n", (unsigned long long) val); ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); /* be sure chip saw it */ - tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); /* need PLL reset clear for at least 11 usec before lane * resets cleared; give it a few more to be sure */ udelay(15); @@ -851,12 +851,12 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd, int pos, ret; dd->ipath_msi_lo = 0; /* used as a flag during reset processing */ - dd->ipath_irq = pdev->irq; ret = pci_enable_msi(dd->pcidev); if (ret) ipath_dev_err(dd, "pci_enable_msi failed: %d, " "interrupts may not work\n", ret); /* continue even if it fails, we may still be OK... */ + dd->ipath_irq = pdev->irq; if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) { u16 control; diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index d819cca524cd..d4f6b5239ef8 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -347,10 +347,9 @@ done: static int init_chip_reset(struct ipath_devdata *dd, struct ipath_portdata **pdp) { - struct ipath_portdata *pd; u32 rtmp; - *pdp = pd = dd->ipath_pd[0]; + *pdp = dd->ipath_pd[0]; /* ensure chip does no sends or receives while we re-initialize */ dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U; ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 5652a550d442..72b9e279d19d 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -598,10 +598,9 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * on close */ if (errs & INFINIPATH_E_RRCVHDRFULL) { - int any; u32 hd, tl; ipath_stats.sps_hdrqfull++; - for (any = i = 0; i < dd->ipath_cfgports; i++) { + for (i = 0; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; if (i == 0) { hd = dd->ipath_port0head; diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 9a6cbd05adcd..851763d7d2db 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -134,7 +134,7 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, */ if (sge->lkey == 0) { isge->mr = NULL; - isge->vaddr = bus_to_virt(sge->addr); + isge->vaddr = (void *) sge->addr; isge->length = sge->length; isge->sge_length = sge->length; ret = 1; @@ -202,12 +202,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, int ret; /* - * We use RKEY == zero for physical addresses - * (see ipath_get_dma_mr). + * We use RKEY == zero for kernel virtual addresses + * (see ipath_get_dma_mr and ipath_dma.c). */ if (rkey == 0) { sge->mr = NULL; - sge->vaddr = phys_to_virt(vaddr); + sge->vaddr = (void *) vaddr; sge->length = len; sge->sge_length = len; ss->sg_list = NULL; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index a0673c1eef71..8cc8598d6c69 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -54,6 +54,8 @@ static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) * @acc: access flags * * Returns the memory region on success, otherwise returns an errno. + * Note that all DMA addresses should be created via the + * struct ib_dma_mapping_ops functions (see ipath_dma.c). */ struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc) { @@ -149,8 +151,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, m = 0; n = 0; for (i = 0; i < num_phys_buf; i++) { - mr->mr.map[m]->segs[n].vaddr = - phys_to_virt(buffer_list[i].addr); + mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr; mr->mr.map[m]->segs[n].length = buffer_list[i].size; mr->mr.length += buffer_list[i].size; n++; @@ -347,7 +348,7 @@ int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, n = 0; ps = 1 << fmr->page_shift; for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = phys_to_virt(page_list[i]); + fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; fmr->mr.map[m]->segs[n].length = ps; if (++n == IPATH_SEGSZ) { m++; diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 46c1c89bf6ae..64f07b19349f 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -379,7 +379,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) wc.vendor_err = 0; wc.byte_len = 0; wc.imm_data = 0; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = 0; wc.wc_flags = 0; wc.pkey_index = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index ce6038743c5c..5ff20cb04494 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -702,7 +702,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc->vendor_err = 0; wc->byte_len = 0; - wc->qp_num = qp->ibqp.qp_num; + wc->qp = &qp->ibqp; wc->src_qp = qp->remote_qpn; wc->pkey_index = 0; wc->slid = qp->remote_ah_attr.dlid; @@ -836,7 +836,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.vendor_err = 0; wc.byte_len = wqe->length; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; @@ -951,7 +951,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.vendor_err = 0; wc.byte_len = 0; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; @@ -1511,7 +1511,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.vendor_err = 0; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index f7530512045d..e86cb171872e 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -137,7 +137,7 @@ bad_lkey: wc.vendor_err = 0; wc.byte_len = 0; wc.imm_data = 0; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = 0; wc.wc_flags = 0; wc.pkey_index = 0; @@ -336,7 +336,7 @@ again: wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.vendor_err = 0; wc.byte_len = 0; - wc.qp_num = sqp->ibqp.qp_num; + wc.qp = &sqp->ibqp; wc.src_qp = sqp->remote_qpn; wc.pkey_index = 0; wc.slid = sqp->remote_ah_attr.dlid; @@ -426,7 +426,7 @@ again: wc.status = IB_WC_SUCCESS; wc.vendor_err = 0; wc.byte_len = wqe->length; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; /* XXX do we know which pkey matched? Only needed for GSI. */ wc.pkey_index = 0; @@ -447,7 +447,7 @@ send_comp: wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.vendor_err = 0; wc.byte_len = wqe->length; - wc.qp_num = sqp->ibqp.qp_num; + wc.qp = &sqp->ibqp; wc.src_qp = 0; wc.pkey_index = 0; wc.slid = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 182de34f9f47..ffa6318ad0cc 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -215,7 +215,6 @@ static ssize_t store_mlid(struct device *dev, size_t count) { struct ipath_devdata *dd = dev_get_drvdata(dev); - int unit; u16 mlid; int ret; @@ -223,8 +222,6 @@ static ssize_t store_mlid(struct device *dev, if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE) goto invalid; - unit = dd->ipath_unit; - dd->ipath_mlid = mlid; goto bail; diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index e636cfd67a82..325d6634ff53 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -49,7 +49,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc->vendor_err = 0; wc->byte_len = wqe->length; - wc->qp_num = qp->ibqp.qp_num; + wc->qp = &qp->ibqp; wc->src_qp = qp->remote_qpn; wc->pkey_index = 0; wc->slid = qp->remote_ah_attr.dlid; @@ -411,7 +411,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.vendor_err = 0; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 49f1102af8b3..9a3e54664ee4 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -66,7 +66,7 @@ bad_lkey: wc.vendor_err = 0; wc.byte_len = 0; wc.imm_data = 0; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = 0; wc.wc_flags = 0; wc.pkey_index = 0; @@ -255,7 +255,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, wc->status = IB_WC_SUCCESS; wc->opcode = IB_WC_RECV; wc->vendor_err = 0; - wc->qp_num = qp->ibqp.qp_num; + wc->qp = &qp->ibqp; wc->src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ wc->pkey_index = 0; @@ -474,7 +474,7 @@ done: wc.vendor_err = 0; wc.opcode = IB_WC_SEND; wc.byte_len = len; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = 0; wc.wc_flags = 0; /* XXX initialize other fields? */ @@ -651,7 +651,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.vendor_err = 0; - wc.qp_num = qp->ibqp.qp_num; + wc.qp = &qp->ibqp; wc.src_qp = src_qp; /* XXX do we know which pkey matched? Only needed for GSI. */ wc.pkey_index = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index acdee33ee1f8..2aaacdb7e52a 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1599,6 +1599,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->detach_mcast = ipath_multicast_detach; dev->process_mad = ipath_process_mad; dev->mmap = ipath_mmap; + dev->dma_ops = &ipath_dma_mapping_ops; snprintf(dev->node_desc, sizeof(dev->node_desc), IPATH_IDSTR " %s", init_utsname()->nodename); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 8039f6e5f0c8..c0c8d5b24a7d 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -812,4 +812,6 @@ extern unsigned int ib_ipath_max_srq_wrs; extern const u32 ib_ipath_rnr_table[]; +extern struct ib_dma_mapping_ops ipath_dma_mapping_ops; + #endif /* IPATH_VERBS_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 768df7265b81..968d1519761c 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1854,7 +1854,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, memset(inbox + 256, 0, 256); - MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); + MTHCA_PUT(inbox, in_wc->qp->qp_num, MAD_IFC_MY_QPN_OFFSET); MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); val = in_wc->sl << 4; diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 283d50b76c3d..efd79ef109a6 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -54,6 +54,10 @@ enum { MTHCA_CQ_ENTRY_SIZE = 0x20 }; +enum { + MTHCA_ATOMIC_BYTE_LEN = 8 +}; + /* * Must be packed because start is 64 bits but only aligned to 32 bits. */ @@ -530,7 +534,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, } } - entry->qp_num = (*cur_qp)->qpn; + entry->qp = &(*cur_qp)->ibqp; if (is_send) { wq = &(*cur_qp)->sq; @@ -599,11 +603,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev, break; case MTHCA_OPCODE_ATOMIC_CS: entry->opcode = IB_WC_COMP_SWAP; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; case MTHCA_OPCODE_ATOMIC_FA: entry->opcode = IB_WC_FETCH_ADD; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; case MTHCA_OPCODE_BIND_MW: entry->opcode = IB_WC_BIND_MW; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 0491ec7a7c0a..44bc6cc734ab 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -80,24 +80,61 @@ static int tune_pci = 0; module_param(tune_pci, int, 0444); MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero"); -struct mutex mthca_device_mutex; +DEFINE_MUTEX(mthca_device_mutex); + +#define MTHCA_DEFAULT_NUM_QP (1 << 16) +#define MTHCA_DEFAULT_RDB_PER_QP (1 << 2) +#define MTHCA_DEFAULT_NUM_CQ (1 << 16) +#define MTHCA_DEFAULT_NUM_MCG (1 << 13) +#define MTHCA_DEFAULT_NUM_MPT (1 << 17) +#define MTHCA_DEFAULT_NUM_MTT (1 << 20) +#define MTHCA_DEFAULT_NUM_UDAV (1 << 15) +#define MTHCA_DEFAULT_NUM_RESERVED_MTTS (1 << 18) +#define MTHCA_DEFAULT_NUM_UARC_SIZE (1 << 18) + +static struct mthca_profile hca_profile = { + .num_qp = MTHCA_DEFAULT_NUM_QP, + .rdb_per_qp = MTHCA_DEFAULT_RDB_PER_QP, + .num_cq = MTHCA_DEFAULT_NUM_CQ, + .num_mcg = MTHCA_DEFAULT_NUM_MCG, + .num_mpt = MTHCA_DEFAULT_NUM_MPT, + .num_mtt = MTHCA_DEFAULT_NUM_MTT, + .num_udav = MTHCA_DEFAULT_NUM_UDAV, /* Tavor only */ + .fmr_reserved_mtts = MTHCA_DEFAULT_NUM_RESERVED_MTTS, /* Tavor only */ + .uarc_size = MTHCA_DEFAULT_NUM_UARC_SIZE, /* Arbel only */ +}; + +module_param_named(num_qp, hca_profile.num_qp, int, 0444); +MODULE_PARM_DESC(num_qp, "maximum number of QPs per HCA"); + +module_param_named(rdb_per_qp, hca_profile.rdb_per_qp, int, 0444); +MODULE_PARM_DESC(rdb_per_qp, "number of RDB buffers per QP"); + +module_param_named(num_cq, hca_profile.num_cq, int, 0444); +MODULE_PARM_DESC(num_cq, "maximum number of CQs per HCA"); + +module_param_named(num_mcg, hca_profile.num_mcg, int, 0444); +MODULE_PARM_DESC(num_mcg, "maximum number of multicast groups per HCA"); + +module_param_named(num_mpt, hca_profile.num_mpt, int, 0444); +MODULE_PARM_DESC(num_mpt, + "maximum number of memory protection table entries per HCA"); + +module_param_named(num_mtt, hca_profile.num_mtt, int, 0444); +MODULE_PARM_DESC(num_mtt, + "maximum number of memory translation table segments per HCA"); + +module_param_named(num_udav, hca_profile.num_udav, int, 0444); +MODULE_PARM_DESC(num_udav, "maximum number of UD address vectors per HCA"); + +module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); +MODULE_PARM_DESC(fmr_reserved_mtts, + "number of memory translation table segments reserved for FMR"); static const char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; -static struct mthca_profile default_profile = { - .num_qp = 1 << 16, - .rdb_per_qp = 4, - .num_cq = 1 << 16, - .num_mcg = 1 << 13, - .num_mpt = 1 << 17, - .num_mtt = 1 << 20, - .num_udav = 1 << 15, /* Tavor only */ - .fmr_reserved_mtts = 1 << 18, /* Tavor only */ - .uarc_size = 1 << 18, /* Arbel only */ -}; - static int mthca_tune_pci(struct mthca_dev *mdev) { int cap; @@ -303,7 +340,7 @@ static int mthca_init_tavor(struct mthca_dev *mdev) goto err_disable; } - profile = default_profile; + profile = hca_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.uarc_size = 0; if (mdev->mthca_flags & MTHCA_FLAG_SRQ) @@ -621,7 +658,7 @@ static int mthca_init_arbel(struct mthca_dev *mdev) goto err_stop_fw; } - profile = default_profile; + profile = hca_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.num_udav = 0; if (mdev->mthca_flags & MTHCA_FLAG_SRQ) @@ -1278,11 +1315,55 @@ static struct pci_driver mthca_driver = { .remove = __devexit_p(mthca_remove_one) }; +static void __init __mthca_check_profile_val(const char *name, int *pval, + int pval_default) +{ + /* value must be positive and power of 2 */ + int old_pval = *pval; + + if (old_pval <= 0) + *pval = pval_default; + else + *pval = roundup_pow_of_two(old_pval); + + if (old_pval != *pval) { + printk(KERN_WARNING PFX "Invalid value %d for %s in module parameter.\n", + old_pval, name); + printk(KERN_WARNING PFX "Corrected %s to %d.\n", name, *pval); + } +} + +#define mthca_check_profile_val(name, default) \ + __mthca_check_profile_val(#name, &hca_profile.name, default) + +static void __init mthca_validate_profile(void) +{ + mthca_check_profile_val(num_qp, MTHCA_DEFAULT_NUM_QP); + mthca_check_profile_val(rdb_per_qp, MTHCA_DEFAULT_RDB_PER_QP); + mthca_check_profile_val(num_cq, MTHCA_DEFAULT_NUM_CQ); + mthca_check_profile_val(num_mcg, MTHCA_DEFAULT_NUM_MCG); + mthca_check_profile_val(num_mpt, MTHCA_DEFAULT_NUM_MPT); + mthca_check_profile_val(num_mtt, MTHCA_DEFAULT_NUM_MTT); + mthca_check_profile_val(num_udav, MTHCA_DEFAULT_NUM_UDAV); + mthca_check_profile_val(fmr_reserved_mtts, MTHCA_DEFAULT_NUM_RESERVED_MTTS); + + if (hca_profile.fmr_reserved_mtts >= hca_profile.num_mtt) { + printk(KERN_WARNING PFX "Invalid fmr_reserved_mtts module parameter %d.\n", + hca_profile.fmr_reserved_mtts); + printk(KERN_WARNING PFX "(Must be smaller than num_mtt %d)\n", + hca_profile.num_mtt); + hca_profile.fmr_reserved_mtts = hca_profile.num_mtt / 2; + printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n", + hca_profile.fmr_reserved_mtts); + } +} + static int __init mthca_init(void) { int ret; - mutex_init(&mthca_device_mutex); + mthca_validate_profile(); + ret = mthca_catas_init(); if (ret) return ret; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 15cc2f6eb475..6b19645d946c 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -232,7 +232,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj) list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { - if (chunk->mem[i].length >= offset) { + if (chunk->mem[i].length > offset) { page = chunk->mem[i].page; goto out; } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 21422a3336ad..7b96751695ea 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -124,7 +124,7 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_map_per_fmr = 255; else props->max_map_per_fmr = - (1 << (32 - long_log2(mdev->limits.num_mpts))) - 1; + (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1; err = 0; out: @@ -816,7 +816,7 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda lkey = ucmd.lkey; } - ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status); + ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status); if (status) ret = -EINVAL; @@ -1100,10 +1100,11 @@ static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct mthca_fmr *fmr; int err; - fmr = kmemdup(fmr_attr, sizeof *fmr, GFP_KERNEL); + fmr = kmalloc(sizeof *fmr, GFP_KERNEL); if (!fmr) return ERR_PTR(-ENOMEM); + memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr); err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num, convert_access(mr_access_flags), fmr); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 33e3ba7937f1..5f5214c0337d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -429,13 +429,18 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); - int err; - struct mthca_mailbox *mailbox; + int err = 0; + struct mthca_mailbox *mailbox = NULL; struct mthca_qp_param *qp_param; struct mthca_qp_context *context; int mthca_state; u8 status; + if (qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); @@ -454,7 +459,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m mthca_state = be32_to_cpu(context->flags) >> 28; qp_attr->qp_state = to_ib_qp_state(mthca_state); - qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->path_mtu = context->mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); @@ -464,11 +468,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff; qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context->params2)); - qp_attr->cap.max_send_wr = qp->sq.max; - qp_attr->cap.max_recv_wr = qp->rq.max; - qp_attr->cap.max_send_sge = qp->sq.max_gs; - qp_attr->cap.max_recv_sge = qp->rq.max_gs; - qp_attr->cap.max_inline_data = qp->max_inline_data; if (qp->transport == RC || qp->transport == UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); @@ -495,7 +494,16 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5; qp_attr->alt_timeout = context->alt_path.ackto >> 3; - qp_init_attr->cap = qp_attr->cap; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_send_wr = qp->sq.max; + qp_attr->cap.max_recv_wr = qp->rq.max; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + qp_attr->cap.max_inline_data = qp->max_inline_data; + + qp_init_attr->cap = qp_attr->cap; out: mthca_free_mailbox(dev, mailbox); @@ -636,11 +644,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, if (mthca_is_memfree(dev)) { if (qp->rq.max) - qp_context->rq_size_stride = long_log2(qp->rq.max) << 3; + qp_context->rq_size_stride = ilog2(qp->rq.max) << 3; qp_context->rq_size_stride |= qp->rq.wqe_shift - 4; if (qp->sq.max) - qp_context->sq_size_stride = long_log2(qp->sq.max) << 3; + qp_context->sq_size_stride = ilog2(qp->sq.max) << 3; qp_context->sq_size_stride |= qp->sq.wqe_shift - 4; } diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 34d2c4768962..10684da33d58 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -120,7 +120,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, memset(context, 0, sizeof *context); - logsize = long_log2(srq->max); + logsize = ilog2(srq->max); context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); @@ -213,7 +213,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz)) return -EINVAL; - srq->wqe_shift = long_log2(ds); + srq->wqe_shift = ilog2(ds); srq->srqn = mthca_alloc(&dev->srq_table.alloc); if (srq->srqn == -1) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 99547996aba2..07deee8f81ce 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -105,12 +105,12 @@ struct ipoib_mcast; struct ipoib_rx_buf { struct sk_buff *skb; - dma_addr_t mapping; + u64 mapping; }; struct ipoib_tx_buf { struct sk_buff *skb; - DECLARE_PCI_UNMAP_ADDR(mapping) + u64 mapping; }; /* diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f10fba5d3265..59d9594ed6d9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -109,9 +109,8 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) ret = ib_post_recv(priv->qp, ¶m, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - dma_unmap_single(priv->ca->dma_device, - priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } @@ -123,7 +122,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; - dma_addr_t addr; + u64 addr; skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); if (!skb) @@ -136,10 +135,9 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) */ skb_reserve(skb, 4); - addr = dma_map_single(priv->ca->dma_device, - skb->data, IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(addr))) { + addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { dev_kfree_skb_any(skb); return -EIO; } @@ -174,7 +172,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; struct sk_buff *skb; - dma_addr_t addr; + u64 addr; ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n", wr_id, wc->opcode, wc->status); @@ -193,8 +191,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_single(priv->ca, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -212,8 +210,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); @@ -261,10 +258,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &priv->tx_ring[wr_id]; - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(tx_req, mapping), - tx_req->skb->len, - DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, tx_req->mapping, + tx_req->skb->len, DMA_TO_DEVICE); ++priv->stats.tx_packets; priv->stats.tx_bytes += tx_req->skb->len; @@ -311,7 +306,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, struct ib_ah *address, u32 qpn, - dma_addr_t addr, int len) + u64 addr, int len) { struct ib_send_wr *bad_wr; @@ -330,7 +325,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_tx_buf *tx_req; - dma_addr_t addr; + u64 addr; if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -353,21 +348,20 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, */ tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; tx_req->skb = skb; - addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(addr))) { + addr = ib_dma_map_single(priv->ca, skb->data, skb->len, + DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { ++priv->stats.tx_errors; dev_kfree_skb_any(skb); return; } - pci_unmap_addr_set(tx_req, mapping, addr); + tx_req->mapping = addr; if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), address->ah, qpn, addr, skb->len))) { ipoib_warn(priv, "post_send failed\n"); ++priv->stats.tx_errors; - dma_unmap_single(priv->ca->dma_device, addr, skb->len, - DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(skb); } else { dev->trans_start = jiffies; @@ -538,24 +532,27 @@ int ipoib_ib_dev_stop(struct net_device *dev) while ((int) priv->tx_tail - (int) priv->tx_head < 0) { tx_req = &priv->tx_ring[priv->tx_tail & (ipoib_sendq_size - 1)]; - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(tx_req, mapping), - tx_req->skb->len, - DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, + tx_req->mapping, + tx_req->skb->len, + DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; } - for (i = 0; i < ipoib_recvq_size; ++i) - if (priv->rx_ring[i].skb) { - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(&priv->rx_ring[i], - mapping), - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - dev_kfree_skb_any(priv->rx_ring[i].skb); - priv->rx_ring[i].skb = NULL; - } + for (i = 0; i < ipoib_recvq_size; ++i) { + struct ipoib_rx_buf *rx_req; + + rx_req = &priv->rx_ring[i]; + if (!rx_req->skb) + continue; + ib_dma_unmap_single(priv->ca, + rx_req->mapping, + IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + dev_kfree_skb_any(rx_req->skb); + rx_req->skb = NULL; + } goto timeout; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index c09280243726..af5ee2ec4499 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -497,8 +497,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) return; } - skb_queue_head_init(&neigh->queue); - /* * We can only be called from ipoib_start_xmit, so we're * inside tx_lock -- no need to save/restore flags. @@ -806,6 +804,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) neigh->neighbour = neighbour; *to_ipoib_neigh(neighbour) = neigh; + skb_queue_head_init(&neigh->queue); return neigh; } @@ -959,16 +958,17 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) return netdev_priv(dev); } -static ssize_t show_pkey(struct class_device *cdev, char *buf) +static ssize_t show_pkey(struct device *dev, + struct device_attribute *attr, char *buf) { - struct ipoib_dev_priv *priv = - netdev_priv(container_of(cdev, struct net_device, class_dev)); + struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); return sprintf(buf, "0x%04x\n", priv->pkey); } -static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); -static ssize_t create_child(struct class_device *cdev, +static ssize_t create_child(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int pkey; @@ -986,14 +986,14 @@ static ssize_t create_child(struct class_device *cdev, */ pkey |= 0x8000; - ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev), - pkey); + ret = ipoib_vlan_add(to_net_dev(dev), pkey); return ret ? ret : count; } -static CLASS_DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child); +static DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child); -static ssize_t delete_child(struct class_device *cdev, +static ssize_t delete_child(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int pkey; @@ -1005,18 +1005,16 @@ static ssize_t delete_child(struct class_device *cdev, if (pkey < 0 || pkey > 0xffff) return -EINVAL; - ret = ipoib_vlan_delete(container_of(cdev, struct net_device, class_dev), - pkey); + ret = ipoib_vlan_delete(to_net_dev(dev), pkey); return ret ? ret : count; } -static CLASS_DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child); +static DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child); int ipoib_add_pkey_attr(struct net_device *dev) { - return class_device_create_file(&dev->class_dev, - &class_device_attr_pkey); + return device_create_file(&dev->dev, &dev_attr_pkey); } static struct net_device *ipoib_add_port(const char *format, @@ -1084,11 +1082,9 @@ static struct net_device *ipoib_add_port(const char *format, if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; - if (class_device_create_file(&priv->dev->class_dev, - &class_device_attr_create_child)) + if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) goto sysfs_failed; - if (class_device_create_file(&priv->dev->class_dev, - &class_device_attr_delete_child)) + if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) goto sysfs_failed; return priv->dev; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index f887780e8093..085eafe6667c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -42,15 +42,15 @@ #include "ipoib.h" -static ssize_t show_parent(struct class_device *class_dev, char *buf) +static ssize_t show_parent(struct device *d, struct device_attribute *attr, + char *buf) { - struct net_device *dev = - container_of(class_dev, struct net_device, class_dev); + struct net_device *dev = to_net_dev(d); struct ipoib_dev_priv *priv = netdev_priv(dev); return sprintf(buf, "%s\n", priv->parent->name); } -static CLASS_DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) { @@ -118,8 +118,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; - if (class_device_create_file(&priv->dev->class_dev, - &class_device_attr_parent)) + if (device_create_file(&priv->dev->dev, &dev_attr_parent)) goto sysfs_failed; list_add_tail(&priv->list, &ppriv->child_intfs); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9b2041e25d59..dd221eda3ea6 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -177,7 +177,7 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, * - if yes, the mtask is recycled at iscsi_complete_pdu * - if no, the mtask is recycled at iser_snd_completion */ - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; @@ -241,7 +241,7 @@ iscsi_iser_ctask_xmit(struct iscsi_conn *conn, error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); iscsi_iser_ctask_xmit_exit: - if (error && error != -EAGAIN) + if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 234e5b061a75..cae8c96a55f8 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -182,7 +182,7 @@ struct iser_regd_buf { struct iser_mem_reg reg; /* memory registration info */ void *virt_addr; struct iser_device *device; /* device->device for dma_unmap */ - dma_addr_t dma_addr; /* if non zero, addr for dma_unmap */ + u64 dma_addr; /* if non zero, addr for dma_unmap */ enum dma_data_direction direction; /* direction for dma_unmap */ unsigned int data_size; atomic_t ref_count; /* refcount, freed when dec to 0 */ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 9b3d79c796c8..89e37283c836 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -304,18 +304,14 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) static int iser_check_xmit(struct iscsi_conn *conn, void *task) { - int rc = 0; struct iscsi_iser_conn *iser_conn = conn->dd_data; - write_lock_bh(conn->recv_lock); if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == ISER_QP_MAX_REQ_DTOS) { - iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); - rc = -EAGAIN; + iser_dbg("%ld can't xmit task %p\n",jiffies,task); + return -ENOBUFS; } - write_unlock_bh(conn->recv_lock); - return rc; + return 0; } @@ -340,7 +336,7 @@ int iser_send_command(struct iscsi_conn *conn, return -EPERM; } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; edtl = ntohl(hdr->data_length); @@ -426,7 +422,7 @@ int iser_send_data_out(struct iscsi_conn *conn, } if (iser_check_xmit(conn, ctask)) - return -EAGAIN; + return -ENOBUFS; itt = ntohl(hdr->itt); data_seg_len = ntoh24(hdr->dlength); @@ -487,10 +483,8 @@ int iser_send_control(struct iscsi_conn *conn, struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iser_desc *mdesc = mtask->dd_data; struct iser_dto *send_dto = NULL; - unsigned int itt; unsigned long data_seg_len; int err = 0; - unsigned char opcode; struct iser_regd_buf *regd_buf; struct iser_device *device; @@ -500,7 +494,7 @@ int iser_send_control(struct iscsi_conn *conn, } if (iser_check_xmit(conn,mtask)) - return -EAGAIN; + return -ENOBUFS; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; @@ -512,8 +506,6 @@ int iser_send_control(struct iscsi_conn *conn, iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); - itt = ntohl(mtask->hdr->itt); - opcode = mtask->hdr->opcode & ISCSI_OPCODE_MASK; data_seg_len = ntoh24(mtask->hdr->dlength); if (data_seg_len > 0) { @@ -575,7 +567,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, opcode = hdr->opcode & ISCSI_OPCODE_MASK; if (opcode == ISCSI_OP_SCSI_CMD_RSP) { - itt = hdr->itt & ISCSI_ITT_MASK; /* mask out cid and age bits */ + itt = get_itt(hdr->itt); /* mask out cid and age bits */ if (!(itt < session->cmds_max)) iser_err("itt can't be matched to task!!!" "conn %p opcode %d cmds_max %d itt %d\n", @@ -609,6 +601,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_mgmt_task *mtask; + int resume_tx = 0; iser_dbg("Initiator, Data sent dto=0x%p\n", dto); @@ -617,21 +610,22 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == + ISER_QP_MAX_REQ_DTOS) + resume_tx = 1; + atomic_dec(&ib_conn->post_send_buf_count); - write_lock(conn->recv_lock); - if (conn->suspend_tx) { + if (resume_tx) { iser_dbg("%ld resuming tx\n",jiffies); - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); scsi_queue_work(conn->session->host, &conn->xmitwork); } - write_unlock(conn->recv_lock); if (tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ mtask = (void *) ((long)(void *)tx_desc - sizeof(struct iscsi_mgmt_task)); - if (mtask->hdr->itt == cpu_to_be32(ISCSI_RESERVED_TAG)) { + if (mtask->hdr->itt == RESERVED_ITT) { struct iscsi_session *session = conn->session; spin_lock(&conn->session->lock); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 5e122501fd80..fc9f1fd0ae54 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -52,7 +52,7 @@ */ int iser_regd_buff_release(struct iser_regd_buf *regd_buf) { - struct device *dma_device; + struct ib_device *dev; if ((atomic_read(®d_buf->ref_count) == 0) || atomic_dec_and_test(®d_buf->ref_count)) { @@ -61,8 +61,8 @@ int iser_regd_buff_release(struct iser_regd_buf *regd_buf) iser_unreg_mem(®d_buf->reg); if (regd_buf->dma_addr) { - dma_device = regd_buf->device->ib_device->dma_device; - dma_unmap_single(dma_device, + dev = regd_buf->device->ib_device; + ib_dma_unmap_single(dev, regd_buf->dma_addr, regd_buf->data_size, regd_buf->direction); @@ -84,12 +84,12 @@ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction) { - dma_addr_t dma_addr; + u64 dma_addr; - dma_addr = dma_map_single(device->ib_device->dma_device, - regd_buf->virt_addr, - regd_buf->data_size, direction); - BUG_ON(dma_mapping_error(dma_addr)); + dma_addr = ib_dma_map_single(device->ib_device, + regd_buf->virt_addr, + regd_buf->data_size, direction); + BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr)); regd_buf->reg.lkey = device->mr->lkey; regd_buf->reg.len = regd_buf->data_size; @@ -107,14 +107,14 @@ int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { int dma_nents; - struct device *dma_device; + struct ib_device *dev; char *mem = NULL; struct iser_data_buf *data = &iser_ctask->data[cmd_dir]; unsigned long cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) mem = (void *)__get_free_pages(GFP_NOIO, - long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); + ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else mem = kmalloc(cmd_data_len, GFP_NOIO); @@ -147,17 +147,12 @@ int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, iser_ctask->data_copy[cmd_dir].copy_buf = mem; - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - - if (cmd_dir == ISER_DIR_OUT) - dma_nents = dma_map_sg(dma_device, - &iser_ctask->data_copy[cmd_dir].sg_single, - 1, DMA_TO_DEVICE); - else - dma_nents = dma_map_sg(dma_device, - &iser_ctask->data_copy[cmd_dir].sg_single, - 1, DMA_FROM_DEVICE); - + dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + dma_nents = ib_dma_map_sg(dev, + &iser_ctask->data_copy[cmd_dir].sg_single, + 1, + (cmd_dir == ISER_DIR_OUT) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); BUG_ON(dma_nents == 0); iser_ctask->data_copy[cmd_dir].dma_nents = dma_nents; @@ -170,19 +165,16 @@ int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { - struct device *dma_device; + struct ib_device *dev; struct iser_data_buf *mem_copy; unsigned long cmd_data_len; - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - mem_copy = &iser_ctask->data_copy[cmd_dir]; + dev = iser_ctask->iser_conn->ib_conn->device->ib_device; + mem_copy = &iser_ctask->data_copy[cmd_dir]; - if (cmd_dir == ISER_DIR_OUT) - dma_unmap_sg(dma_device, &mem_copy->sg_single, 1, - DMA_TO_DEVICE); - else - dma_unmap_sg(dma_device, &mem_copy->sg_single, 1, - DMA_FROM_DEVICE); + ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, + (cmd_dir == ISER_DIR_OUT) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (cmd_dir == ISER_DIR_IN) { char *mem; @@ -211,7 +203,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, if (cmd_data_len > ISER_KMALLOC_THRESHOLD) free_pages((unsigned long)mem_copy->copy_buf, - long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); + ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else kfree(mem_copy->copy_buf); @@ -231,11 +223,12 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, * consecutive elements. Also, it handles one entry SG. */ static int iser_sg_to_page_vec(struct iser_data_buf *data, - struct iser_page_vec *page_vec) + struct iser_page_vec *page_vec, + struct ib_device *ibdev) { struct scatterlist *sg = (struct scatterlist *)data->buf; - dma_addr_t first_addr, last_addr, page; - int start_aligned, end_aligned; + u64 first_addr, last_addr, page; + int end_aligned; unsigned int cur_page = 0; unsigned long total_sz = 0; int i; @@ -244,19 +237,21 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, page_vec->offset = (u64) sg[0].offset & ~MASK_4K; for (i = 0; i < data->dma_nents; i++) { - total_sz += sg_dma_len(&sg[i]); + unsigned int dma_len = ib_sg_dma_len(ibdev, &sg[i]); + + total_sz += dma_len; - first_addr = sg_dma_address(&sg[i]); - last_addr = first_addr + sg_dma_len(&sg[i]); + first_addr = ib_sg_dma_address(ibdev, &sg[i]); + last_addr = first_addr + dma_len; - start_aligned = !(first_addr & ~MASK_4K); end_aligned = !(last_addr & ~MASK_4K); /* continue to collect page fragments till aligned or SG ends */ while (!end_aligned && (i + 1 < data->dma_nents)) { i++; - total_sz += sg_dma_len(&sg[i]); - last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]); + dma_len = ib_sg_dma_len(ibdev, &sg[i]); + total_sz += dma_len; + last_addr = ib_sg_dma_address(ibdev, &sg[i]) + dma_len; end_aligned = !(last_addr & ~MASK_4K); } @@ -288,10 +283,11 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, * the number of entries which are aligned correctly. Supports the case where * consecutive SG elements are actually fragments of the same physcial page. */ -static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data) +static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, + struct ib_device *ibdev) { struct scatterlist *sg; - dma_addr_t end_addr, next_addr; + u64 end_addr, next_addr; int i, cnt; unsigned int ret_len = 0; @@ -303,12 +299,12 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data) (unsigned long)page_to_phys(sg[i].page), (unsigned long)sg[i].offset, (unsigned long)sg[i].length); */ - end_addr = sg_dma_address(&sg[i]) + - sg_dma_len(&sg[i]); + end_addr = ib_sg_dma_address(ibdev, &sg[i]) + + ib_sg_dma_len(ibdev, &sg[i]); /* iser_dbg("Checking sg iobuf end address " "0x%08lX\n", end_addr); */ if (i + 1 < data->dma_nents) { - next_addr = sg_dma_address(&sg[i+1]); + next_addr = ib_sg_dma_address(ibdev, &sg[i+1]); /* are i, i+1 fragments of the same page? */ if (end_addr == next_addr) continue; @@ -325,7 +321,8 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data) return ret_len; } -static void iser_data_buf_dump(struct iser_data_buf *data) +static void iser_data_buf_dump(struct iser_data_buf *data, + struct ib_device *ibdev) { struct scatterlist *sg = (struct scatterlist *)data->buf; int i; @@ -333,9 +330,9 @@ static void iser_data_buf_dump(struct iser_data_buf *data) for (i = 0; i < data->dma_nents; i++) iser_err("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", - i, (unsigned long)sg_dma_address(&sg[i]), + i, (unsigned long)ib_sg_dma_address(ibdev, &sg[i]), sg[i].page, sg[i].offset, - sg[i].length,sg_dma_len(&sg[i])); + sg[i].length, ib_sg_dma_len(ibdev, &sg[i])); } static void iser_dump_page_vec(struct iser_page_vec *page_vec) @@ -349,7 +346,8 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec) } static void iser_page_vec_build(struct iser_data_buf *data, - struct iser_page_vec *page_vec) + struct iser_page_vec *page_vec, + struct ib_device *ibdev) { int page_vec_len = 0; @@ -357,14 +355,14 @@ static void iser_page_vec_build(struct iser_data_buf *data, page_vec->offset = 0; iser_dbg("Translating sg sz: %d\n", data->dma_nents); - page_vec_len = iser_sg_to_page_vec(data,page_vec); + page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev); iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len); page_vec->length = page_vec_len; if (page_vec_len * SIZE_4K < page_vec->data_size) { iser_err("page_vec too short to hold this SG\n"); - iser_data_buf_dump(data); + iser_data_buf_dump(data, ibdev); iser_dump_page_vec(page_vec); BUG(); } @@ -375,13 +373,12 @@ int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir) { - struct device *dma_device; + struct ib_device *dev; iser_ctask->dir[iser_dir] = 1; - dma_device = - iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + dev = iser_ctask->iser_conn->ib_conn->device->ib_device; - data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); + data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { iser_err("dma_map_sg failed!!!\n"); return -EINVAL; @@ -391,20 +388,19 @@ int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) { - struct device *dma_device; + struct ib_device *dev; struct iser_data_buf *data; - dma_device = - iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + dev = iser_ctask->iser_conn->ib_conn->device->ib_device; if (iser_ctask->dir[ISER_DIR_IN]) { data = &iser_ctask->data[ISER_DIR_IN]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); + ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } if (iser_ctask->dir[ISER_DIR_OUT]) { data = &iser_ctask->data[ISER_DIR_OUT]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); + ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); } } @@ -419,6 +415,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, { struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; + struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; struct iser_regd_buf *regd_buf; int aligned_len; @@ -428,11 +425,11 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, regd_buf = &iser_ctask->rdma_regd[cmd_dir]; - aligned_len = iser_data_buf_aligned_len(mem); + aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { iser_err("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); - iser_data_buf_dump(mem); + iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ iser_dma_unmap_task_data(iser_ctask); @@ -450,8 +447,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, regd_buf->reg.lkey = device->mr->lkey; regd_buf->reg.rkey = device->mr->rkey; - regd_buf->reg.len = sg_dma_len(&sg[0]); - regd_buf->reg.va = sg_dma_address(&sg[0]); + regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); + regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); regd_buf->reg.is_fmr = 0; iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " @@ -461,10 +458,10 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn->page_vec); + iser_page_vec_build(mem, ib_conn->page_vec, ibdev); err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); if (err) { - iser_data_buf_dump(mem); + iser_data_buf_dump(mem, ibdev); iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, ntoh24(iser_ctask->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a6289595557b..5e8ac577f0ad 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -122,9 +122,8 @@ static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, if (!iu->buf) goto out_free_iu; - iu->dma = dma_map_single(host->dev->dev->dma_device, - iu->buf, size, direction); - if (dma_mapping_error(iu->dma)) + iu->dma = ib_dma_map_single(host->dev->dev, iu->buf, size, direction); + if (ib_dma_mapping_error(host->dev->dev, iu->dma)) goto out_free_buf; iu->size = size; @@ -145,8 +144,7 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) if (!iu) return; - dma_unmap_single(host->dev->dev->dma_device, - iu->dma, iu->size, iu->direction); + ib_dma_unmap_single(host->dev->dev, iu->dma, iu->size, iu->direction); kfree(iu->buf); kfree(iu); } @@ -482,8 +480,8 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, scat = &req->fake_sg; } - dma_unmap_sg(target->srp_host->dev->dev->dma_device, scat, nents, - scmnd->sc_data_direction); + ib_dma_unmap_sg(target->srp_host->dev->dev, scat, nents, + scmnd->sc_data_direction); } static void srp_remove_req(struct srp_target_port *target, struct srp_request *req) @@ -550,6 +548,7 @@ static int srp_reconnect_target(struct srp_target_port *target) target->tx_head = 0; target->tx_tail = 0; + target->qp_in_error = 0; ret = srp_connect_target(target); if (ret) goto err; @@ -595,23 +594,26 @@ static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat, int i, j; int ret; struct srp_device *dev = target->srp_host->dev; + struct ib_device *ibdev = dev->dev; if (!dev->fmr_pool) return -ENODEV; - if ((sg_dma_address(&scat[0]) & ~dev->fmr_page_mask) && + if ((ib_sg_dma_address(ibdev, &scat[0]) & ~dev->fmr_page_mask) && mellanox_workarounds && !memcmp(&target->ioc_guid, mellanox_oui, 3)) return -EINVAL; len = page_cnt = 0; for (i = 0; i < sg_cnt; ++i) { - if (sg_dma_address(&scat[i]) & ~dev->fmr_page_mask) { + unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); + + if (ib_sg_dma_address(ibdev, &scat[i]) & ~dev->fmr_page_mask) { if (i > 0) return -EINVAL; else ++page_cnt; } - if ((sg_dma_address(&scat[i]) + sg_dma_len(&scat[i])) & + if ((ib_sg_dma_address(ibdev, &scat[i]) + dma_len) & ~dev->fmr_page_mask) { if (i < sg_cnt - 1) return -EINVAL; @@ -619,7 +621,7 @@ static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat, ++page_cnt; } - len += sg_dma_len(&scat[i]); + len += dma_len; } page_cnt += len >> dev->fmr_page_shift; @@ -631,10 +633,14 @@ static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat, return -ENOMEM; page_cnt = 0; - for (i = 0; i < sg_cnt; ++i) - for (j = 0; j < sg_dma_len(&scat[i]); j += dev->fmr_page_size) + for (i = 0; i < sg_cnt; ++i) { + unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); + + for (j = 0; j < dma_len; j += dev->fmr_page_size) dma_pages[page_cnt++] = - (sg_dma_address(&scat[i]) & dev->fmr_page_mask) + j; + (ib_sg_dma_address(ibdev, &scat[i]) & + dev->fmr_page_mask) + j; + } req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool, dma_pages, page_cnt, io_addr); @@ -644,7 +650,8 @@ static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat, goto out; } - buf->va = cpu_to_be64(sg_dma_address(&scat[0]) & ~dev->fmr_page_mask); + buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, &scat[0]) & + ~dev->fmr_page_mask); buf->key = cpu_to_be32(req->fmr->fmr->rkey); buf->len = cpu_to_be32(len); @@ -663,6 +670,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_cmd *cmd = req->cmd->buf; int len, nents, count; u8 fmt = SRP_DATA_DESC_DIRECT; + struct srp_device *dev; + struct ib_device *ibdev; if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE) return sizeof (struct srp_cmd); @@ -687,8 +696,10 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen); } - count = dma_map_sg(target->srp_host->dev->dev->dma_device, - scat, nents, scmnd->sc_data_direction); + dev = target->srp_host->dev; + ibdev = dev->dev; + + count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); @@ -702,9 +713,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, */ struct srp_direct_buf *buf = (void *) cmd->add_data; - buf->va = cpu_to_be64(sg_dma_address(scat)); - buf->key = cpu_to_be32(target->srp_host->dev->mr->rkey); - buf->len = cpu_to_be32(sg_dma_len(scat)); + buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); + buf->key = cpu_to_be32(dev->mr->rkey); + buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); } else if (srp_map_fmr(target, scat, count, req, (void *) cmd->add_data)) { /* @@ -722,13 +733,14 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, count * sizeof (struct srp_direct_buf); for (i = 0; i < count; ++i) { + unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); + buf->desc_list[i].va = - cpu_to_be64(sg_dma_address(&scat[i])); + cpu_to_be64(ib_sg_dma_address(ibdev, &scat[i])); buf->desc_list[i].key = - cpu_to_be32(target->srp_host->dev->mr->rkey); - buf->desc_list[i].len = - cpu_to_be32(sg_dma_len(&scat[i])); - datalen += sg_dma_len(&scat[i]); + cpu_to_be32(dev->mr->rkey); + buf->desc_list[i].len = cpu_to_be32(dma_len); + datalen += dma_len; } if (scmnd->sc_data_direction == DMA_TO_DEVICE) @@ -808,13 +820,15 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) { + struct ib_device *dev; struct srp_iu *iu; u8 opcode; iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV]; - dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma, - target->max_ti_iu_len, DMA_FROM_DEVICE); + dev = target->srp_host->dev->dev; + ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len, + DMA_FROM_DEVICE); opcode = *(u8 *) iu->buf; @@ -850,8 +864,8 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) break; } - dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma, - target->max_ti_iu_len, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len, + DMA_FROM_DEVICE); } static void srp_completion(struct ib_cq *cq, void *target_ptr) @@ -865,6 +879,7 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) printk(KERN_ERR PFX "failed %s status %d\n", wc.wr_id & SRP_OP_RECV ? "receive" : "send", wc.status); + target->qp_in_error = 1; break; } @@ -969,6 +984,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, struct srp_request *req; struct srp_iu *iu; struct srp_cmd *cmd; + struct ib_device *dev; int len; if (target->state == SRP_TARGET_CONNECTING) @@ -985,8 +1001,9 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, if (!iu) goto err; - dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma, - srp_max_iu_len, DMA_TO_DEVICE); + dev = target->srp_host->dev->dev; + ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len, + DMA_TO_DEVICE); req = list_entry(target->free_reqs.next, struct srp_request, list); @@ -1018,8 +1035,8 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, goto err_unmap; } - dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma, - srp_max_iu_len, DMA_TO_DEVICE); + ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len, + DMA_TO_DEVICE); if (__srp_post_send(target, iu, len)) { printk(KERN_ERR PFX "Send failed\n"); @@ -1322,6 +1339,8 @@ static int srp_abort(struct scsi_cmnd *scmnd) printk(KERN_ERR "SRP abort called\n"); + if (target->qp_in_error) + return FAILED; if (srp_find_req(target, scmnd, &req)) return FAILED; if (srp_send_tsk_mgmt(target, req, SRP_TSK_ABORT_TASK)) @@ -1350,6 +1369,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) printk(KERN_ERR "SRP reset_device called\n"); + if (target->qp_in_error) + return FAILED; if (srp_find_req(target, scmnd, &req)) return FAILED; if (srp_send_tsk_mgmt(target, req, SRP_TSK_LUN_RESET)) @@ -1606,18 +1627,30 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) switch (token) { case SRP_OPT_ID_EXT: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_IOC_GUID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_DGID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } if (strlen(p) != 32) { printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); kfree(p); @@ -1641,6 +1674,10 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_SERVICE_ID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; @@ -1678,6 +1715,10 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_INITIATOR_EXT: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; @@ -1766,6 +1807,7 @@ static ssize_t srp_create_target(struct class_device *class_dev, goto err_free; } + target->qp_in_error = 0; ret = srp_connect_target(target); if (ret) { printk(KERN_ERR PFX "Connection failed\n"); @@ -1883,7 +1925,7 @@ static void srp_add_one(struct ib_device *device) */ srp_dev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1); srp_dev->fmr_page_size = 1 << srp_dev->fmr_page_shift; - srp_dev->fmr_page_mask = ~((unsigned long) srp_dev->fmr_page_size - 1); + srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1); INIT_LIST_HEAD(&srp_dev->dev_list); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index d4e35ef51374..2f3319c719a5 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -87,7 +87,7 @@ struct srp_device { struct ib_fmr_pool *fmr_pool; int fmr_page_shift; int fmr_page_size; - unsigned long fmr_page_mask; + u64 fmr_page_mask; }; struct srp_host { @@ -158,10 +158,11 @@ struct srp_target_port { struct completion done; int status; enum srp_target_state state; + int qp_in_error; }; struct srp_iu { - dma_addr_t dma; + u64 dma; void *buf; size_t size; enum dma_data_direction direction; |