From 6b73597e7062118c0549c2702bfb7d273518c906 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 26 Sep 2005 13:53:25 -0700 Subject: [IB] uverbs: ABI-breaking fixes for userspace verbs Introduce new userspace verbs ABI version 3. This eliminates some unneeded commands, and adds support for user-created completion channels. This cleans up problems with file leaks on error paths, and also makes sure that file descriptors are always installed into the correct process. Signed-off-by: Roland Dreier --- include/rdma/ib_user_verbs.h | 58 +++++++++++--------------------------------- 1 file changed, 14 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index fd85725391a4..0532b78200ca 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -42,15 +42,12 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 2 +#define IB_USER_VERBS_ABI_VERSION 3 enum { - IB_USER_VERBS_CMD_QUERY_PARAMS, IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, - IB_USER_VERBS_CMD_QUERY_GID, - IB_USER_VERBS_CMD_QUERY_PKEY, IB_USER_VERBS_CMD_ALLOC_PD, IB_USER_VERBS_CMD_DEALLOC_PD, IB_USER_VERBS_CMD_CREATE_AH, @@ -65,6 +62,7 @@ enum { IB_USER_VERBS_CMD_ALLOC_MW, IB_USER_VERBS_CMD_BIND_MW, IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_CMD_RESIZE_CQ, IB_USER_VERBS_CMD_DESTROY_CQ, @@ -118,27 +116,14 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; -/* - * No driver_data for "query params" command, since this is intended - * to be a core function with no possible device dependence. - */ -struct ib_uverbs_query_params { - __u64 response; -}; - -struct ib_uverbs_query_params_resp { - __u32 num_cq_events; -}; - struct ib_uverbs_get_context { __u64 response; - __u64 cq_fd_tab; __u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { __u32 async_fd; - __u32 reserved; + __u32 num_comp_vectors; }; struct ib_uverbs_query_device { @@ -220,31 +205,6 @@ struct ib_uverbs_query_port_resp { __u8 reserved[3]; }; -struct ib_uverbs_query_gid { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_gid_resp { - __u8 gid[16]; -}; - -struct ib_uverbs_query_pkey { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_pkey_resp { - __u16 pkey; - __u16 reserved; -}; - struct ib_uverbs_alloc_pd { __u64 response; __u64 driver_data[0]; @@ -278,11 +238,21 @@ struct ib_uverbs_dereg_mr { __u32 mr_handle; }; +struct ib_uverbs_create_comp_channel { + __u64 response; +}; + +struct ib_uverbs_create_comp_channel_resp { + __u32 fd; +}; + struct ib_uverbs_create_cq { __u64 response; __u64 user_handle; __u32 cqe; - __u32 event_handler; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; __u64 driver_data[0]; }; -- cgit v1.2.1 From 274c0891637c44ac71f3ac40be91b43c2318883a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 29 Sep 2005 14:17:48 -0700 Subject: [IB] uverbs: Add device-specific ABI version attribute Add abi_version attribute to uverbs class devices to allow for ABI versioning of device-specific interfaces. Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6f4c9e55df7..d13f25a78192 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -951,6 +951,8 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; + int uverbs_abi_ver; + u8 node_type; u8 phys_port_cnt; }; -- cgit v1.2.1 From 883a99c7024c5763d6d4f22d9239c133893e8d74 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 14 Oct 2005 14:00:58 -0700 Subject: [IB] uverbs: Add a mask of device methods allowed for userspace Give each device a uverbs_cmd_mask, so that a low-level driver can control which methods may be called on behalf of userspace. Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d13f25a78192..a5a963cb5676 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -951,6 +951,7 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; + u64 uverbs_cmd_mask; int uverbs_abi_ver; u8 node_type; -- cgit v1.2.1 From 91ecd4ae178bed83da4f6a94ced7992e4d7711eb Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 14 Oct 2005 15:21:44 -0700 Subject: [IB] uverbs: Add ABI structures for more commands Add kernel/user ABI structures for marshalling poll CQ, request CQ notification, post send, post receive, post SRQ receive, create AH and destroy AH commands. These commands allow us to support userspace verbs for devices that can't perform these operations directly from userspace (eg the PathScale HCA). Signed-off-by: Roland Dreier --- include/rdma/ib_user_verbs.h | 164 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 0532b78200ca..072f3a2edace 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -88,8 +89,11 @@ enum { * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. */ struct ib_uverbs_async_event_desc { @@ -261,6 +265,41 @@ struct ib_uverbs_create_cq_resp { __u32 cqe; }; +struct ib_uverbs_poll_cq { + __u64 response; + __u32 cq_handle; + __u32 ne; +}; + +struct ib_uverbs_wc { + __u64 wr_id; + __u32 status; + __u32 opcode; + __u32 vendor_err; + __u32 byte_len; + __u32 imm_data; + __u32 qp_num; + __u32 src_qp; + __u32 wc_flags; + __u16 pkey_index; + __u16 slid; + __u8 sl; + __u8 dlid_path_bits; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_poll_cq_resp { + __u32 count; + __u32 reserved; + struct ib_uverbs_wc wc[0]; +}; + +struct ib_uverbs_req_notify_cq { + __u32 cq_handle; + __u32 solicited_only; +}; + struct ib_uverbs_destroy_cq { __u64 response; __u32 cq_handle; @@ -358,6 +397,127 @@ struct ib_uverbs_destroy_qp_resp { __u32 events_reported; }; +/* + * The ib_uverbs_sge structure isn't used anywhere, since we assume + * the ib_sge structure is packed the same way on 32-bit and 64-bit + * architectures in both kernel and user space. It's just here to + * document the ABI. + */ +struct ib_uverbs_sge { + __u64 addr; + __u32 length; + __u32 lkey; +}; + +struct ib_uverbs_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + __u32 imm_data; + union { + struct { + __u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 ah; + __u32 remote_qpn; + __u32 remote_qkey; + __u32 reserved; + } ud; + } wr; +}; + +struct ib_uverbs_post_send { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_send_wr send_wr[0]; +}; + +struct ib_uverbs_post_send_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_recv_wr { + __u64 wr_id; + __u32 num_sge; + __u32 reserved; +}; + +struct ib_uverbs_post_recv { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv_wr[0]; +}; + +struct ib_uverbs_post_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_post_srq_recv { + __u64 response; + __u32 srq_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv[0]; +}; + +struct ib_uverbs_post_srq_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ib_uverbs_ah_attr { + struct ib_uverbs_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_create_ah { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 reserved; + struct ib_uverbs_ah_attr attr; +}; + +struct ib_uverbs_create_ah_resp { + __u32 ah_handle; +}; + +struct ib_uverbs_destroy_ah { + __u32 ah_handle; +}; + struct ib_uverbs_attach_mcast { __u8 gid[16]; __u32 qp_handle; -- cgit v1.2.1 From 07d357d0cbf89d9980b1769d5444a3c70f000e00 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 17 Oct 2005 15:37:43 -0700 Subject: [IB] CM: bind IDs to a specific device Bind communication identifiers to a device to support device removal. Export per HCA CM devices to userspace. Signed-off-by: Sean Hefty --- include/rdma/ib_cm.h | 10 ++++++---- include/rdma/ib_user_cm.h | 10 ++++------ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 5308683c8c41..0a9fcd59eb43 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -109,7 +109,6 @@ struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; - struct ib_device *device; u8 port; struct ib_sa_path_rec *primary_path; @@ -220,7 +219,6 @@ struct ib_cm_apr_event_param { struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; - struct ib_device *device; u8 port; u16 pkey; }; @@ -284,6 +282,7 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, struct ib_cm_id { ib_cm_handler cm_handler; void *context; + struct ib_device *device; __be64 service_id; __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ @@ -295,6 +294,8 @@ struct ib_cm_id { /** * ib_create_cm_id - Allocate a communication identifier. + * @device: Device associated with the cm_id. All related communication will + * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @context: User specified context associated with the communication * identifier. @@ -302,7 +303,8 @@ struct ib_cm_id { * Communication identifiers are used to track connection states, service * ID resolution requests, and listen requests. */ -struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, void *context); /** diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index e4d1654276ad..3037588b8464 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -38,7 +38,7 @@ #include -#define IB_USER_CM_ABI_VERSION 2 +#define IB_USER_CM_ABI_VERSION 3 enum { IB_USER_CM_CMD_CREATE_ID, @@ -299,8 +299,6 @@ struct ib_ucm_event_get { }; struct ib_ucm_req_event_resp { - /* device */ - /* port */ struct ib_ucm_path_rec primary_path; struct ib_ucm_path_rec alternate_path; __be64 remote_ca_guid; @@ -316,6 +314,7 @@ struct ib_ucm_req_event_resp { __u8 retry_count; __u8 rnr_retry_count; __u8 srq; + __u8 port; }; struct ib_ucm_rep_event_resp { @@ -353,10 +352,9 @@ struct ib_ucm_apr_event_resp { }; struct ib_ucm_sidr_req_event_resp { - /* device */ - /* port */ __u16 pkey; - __u8 reserved[2]; + __u8 port; + __u8 reserved; }; struct ib_ucm_sidr_rep_event_resp { -- cgit v1.2.1 From 34816ad98efe4d47ffd858a0345321f9d85d9420 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 25 Oct 2005 10:51:39 -0700 Subject: [IB] Fix MAD layer DMA mappings to avoid touching data buffer once mapped The MAD layer was violating the DMA API by touching data buffers used for sends after the DMA mapping was done. This causes problems on non-cache-coherent architectures, because the device doing DMA won't see updates to the payload buffers that exist only in the CPU cache. Fix this by having all MAD consumers use ib_create_send_mad() to allocate their send buffers, and moving the DMA mapping into the MAD layer so it can be done just before calling send (and after any modifications of the send buffer by the MAD layer). Tested on a non-cache-coherent PowerPC 440SPe system. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_mad.h | 66 +++++++++++++++++++++++++------------------------ include/rdma/ib_verbs.h | 3 --- 2 files changed, 34 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 4172e6841e3d..2c133506742b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -109,10 +109,14 @@ #define IB_QP_SET_QKEY 0x80000000 enum { + IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, + IB_MGMT_RMPP_HDR = 36, IB_MGMT_RMPP_DATA = 220, + IB_MGMT_VENDOR_HDR = 40, IB_MGMT_VENDOR_DATA = 216, - IB_MGMT_SA_DATA = 200 + IB_MGMT_SA_HDR = 56, + IB_MGMT_SA_DATA = 200, }; struct ib_mad_hdr { @@ -203,26 +207,25 @@ struct ib_class_port_info /** * ib_mad_send_buf - MAD data buffer and work request for sends. - * @mad: References an allocated MAD data buffer. The size of the data - * buffer is specified in the @send_wr.length field. - * @mapping: DMA mapping information. + * @next: A pointer used to chain together MADs for posting. + * @mad: References an allocated MAD data buffer. * @mad_agent: MAD agent that allocated the buffer. + * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. - * @send_wr: An initialized work request structure used when sending the MAD. - * The wr_id field of the work request is initialized to reference this - * data structure. - * @sge: A scatter-gather list referenced by the work request. + * @timeout_ms: Time to wait for a response. + * @retries: Number of times to retry a request for a response. * * Users are responsible for initializing the MAD buffer itself, with the * exception of specifying the payload length field in any RMPP MAD. */ struct ib_mad_send_buf { - struct ib_mad *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_mad_send_buf *next; + void *mad; struct ib_mad_agent *mad_agent; + struct ib_ah *ah; void *context[2]; - struct ib_send_wr send_wr; - struct ib_sge sge; + int timeout_ms; + int retries; }; /** @@ -287,7 +290,7 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, + struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc); /** @@ -334,13 +337,13 @@ struct ib_mad_agent { /** * ib_mad_send_wc - MAD send completion information. - * @wr_id: Work request identifier associated with the send MAD request. + * @send_buf: Send MAD data buffer associated with the send MAD request. * @status: Completion status. * @vendor_err: Optional vendor error information returned with a failed * request. */ struct ib_mad_send_wc { - u64 wr_id; + struct ib_mad_send_buf *send_buf; enum ib_wc_status status; u32 vendor_err; }; @@ -366,7 +369,7 @@ struct ib_mad_recv_buf { * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. * - * For received response, the wr_id field of the wc is set to the wr_id + * For received response, the wr_id contains a pointer to the ib_mad_send_buf * for the corresponding send request. */ struct ib_mad_recv_wc { @@ -463,9 +466,9 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); /** * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client. - * @mad_agent: Specifies the associated registration to post the send to. - * @send_wr: Specifies the information needed to send the MAD(s). - * @bad_send_wr: Specifies the MAD on which an error was encountered. + * @send_buf: Specifies the information needed to send the MAD(s). + * @bad_send_buf: Specifies the MAD on which an error was encountered. This + * parameter is optional if only a single MAD is posted. * * Sent MADs are not guaranteed to complete in the order that they were posted. * @@ -479,9 +482,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); * defined data being transferred. The paylen_newwin field should be * specified in network-byte order. */ -int ib_post_send_mad(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf); /** * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. @@ -507,23 +509,25 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** * ib_cancel_mad - Cancels an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @wr_id: Indicates the work request identifier of the MAD to cancel. + * @send_buf: Indicates the MAD to cancel. * * MADs will be returned to the user through the corresponding * ib_mad_send_handler. */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf); /** * ib_modify_mad - Modifies an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @wr_id: Indicates the work request identifier of the MAD to modify. + * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ -int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms); /** * ib_redirect_mad_qp - Registers a QP for MAD services. @@ -572,7 +576,6 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * @remote_qpn: Specifies the QPN of the receiving node. * @pkey_index: Specifies which PKey the MAD will be sent using. This field * is valid only if the remote_qpn is QP 1. - * @ah: References the address handle used to transfer to the remote node. * @rmpp_active: Indicates if the send will enable RMPP. * @hdr_len: Indicates the size of the data header of the MAD. This length * should include the common MAD header, RMPP header, plus any class @@ -582,11 +585,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. * - * This is a helper routine that may be used to allocate a MAD. Users are - * not required to allocate outbound MADs using this call. The returned - * MAD send buffer will reference a data buffer usable for sending a MAD, along + * This routine allocates a MAD for sending. The returned MAD send buffer + * will reference a data buffer usable for sending a MAD, along * with an initialized work request structure. Users may modify the returned - * MAD data buffer or work request before posting the send. + * MAD data buffer before posting the send. * * The returned data buffer will be cleared. Users are responsible for * initializing the common MAD and any class specific headers. If @rmpp_active @@ -594,7 +596,7 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, */ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, int rmpp_active, + int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a5a963cb5676..f72d46d54e0a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -595,11 +595,8 @@ struct ib_send_wr { } atomic; struct { struct ib_ah *ah; - struct ib_mad_hdr *mad_hdr; u32 remote_qpn; u32 remote_qkey; - int timeout_ms; /* valid for MADs only */ - int retries; /* valid for MADs only */ u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; -- cgit v1.2.1