From c6ba7c9ba43de1b57e9a53946e7ff988554c84ed Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:07 +0200 Subject: net/smc: add base infrastructure for SMC-D and ISM SMC supports two variants: SMC-R and SMC-D. For data transport, SMC-R uses RDMA devices, SMC-D uses so-called Internal Shared Memory (ISM) devices. An ISM device only allows shared memory communication between SMC instances on the same machine. For example, this allows virtual machines on the same host to communicate via SMC without RDMA devices. This patch adds the base infrastructure for SMC-D and ISM devices to the existing SMC code. It contains the following: * ISM driver interface: This interface allows an ISM driver to register ISM devices in SMC. In the process, the driver provides a set of device ops for each device. SMC uses these ops to execute SMC specific operations on or transfer data over the device. * Core SMC-D link group, connection, and buffer support: Link groups, SMC connections and SMC buffers (in smc_core) are extended to support SMC-D. * SMC type checks: Some type checks are added to prevent using SMC-R specific code for SMC-D and vice versa. To actually use SMC-D, additional changes to pnetid, CLC, CDC, etc. are required. These are added in follow-up patches. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- net/smc/smc_core.h | 71 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 19 deletions(-) (limited to 'net/smc/smc_core.h') diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 93cb3523bf50..cd9268a9570e 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -124,15 +124,28 @@ struct smc_buf_desc { void *cpu_addr; /* virtual address of buffer */ struct page *pages; int len; /* length of buffer */ - struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: memory region - * incl. rkey provided to peer - */ - u32 order; /* allocation order */ u32 used; /* currently used / unused */ u8 reused : 1; /* new created / reused */ u8 regerr : 1; /* err during registration */ + union { + struct { /* SMC-R */ + struct sg_table sgt[SMC_LINKS_PER_LGR_MAX]; + /* virtual buffer */ + struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: memory region + * incl. rkey provided to peer + */ + u32 order; /* allocation order */ + }; + struct { /* SMC-D */ + unsigned short sba_idx; + /* SBA index number */ + u64 token; + /* DMB token number */ + dma_addr_t dma_addr; + /* DMA address */ + }; + }; }; struct smc_rtoken { /* address/key of remote RMB */ @@ -148,12 +161,10 @@ struct smc_rtoken { /* address/key of remote RMB */ * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15) */ +struct smcd_dev; + struct smc_link_group { struct list_head list; - enum smc_lgr_role role; /* client or server */ - struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ - char peer_systemid[SMC_SYSTEMID_LEN]; - /* unique system_id of peer */ struct rb_root conns_all; /* connection tree */ rwlock_t conns_lock; /* protects conns_all */ unsigned int conns_num; /* current # of connections */ @@ -163,17 +174,35 @@ struct smc_link_group { rwlock_t sndbufs_lock; /* protects tx buffers */ struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ rwlock_t rmbs_lock; /* protects rx buffers */ - struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] - [SMC_LINKS_PER_LGR_MAX]; - /* remote addr/key pairs */ - unsigned long rtokens_used_mask[BITS_TO_LONGS( - SMC_RMBS_PER_LGR_MAX)]; - /* used rtoken elements */ u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ struct delayed_work free_work; /* delayed freeing of an lgr */ u8 sync_err : 1; /* lgr no longer fits to peer */ u8 terminating : 1;/* lgr is terminating */ + + bool is_smcd; /* SMC-R or SMC-D */ + union { + struct { /* SMC-R */ + enum smc_lgr_role role; + /* client or server */ + struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; + /* smc link */ + char peer_systemid[SMC_SYSTEMID_LEN]; + /* unique system_id of peer */ + struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] + [SMC_LINKS_PER_LGR_MAX]; + /* remote addr/key pairs */ + unsigned long rtokens_used_mask[BITS_TO_LONGS + (SMC_RMBS_PER_LGR_MAX)]; + /* used rtoken elements */ + }; + struct { /* SMC-D */ + u64 peer_gid; + /* Peer GID (remote) */ + struct smcd_dev *smcd; + /* ISM device for VLAN reg. */ + }; + }; }; /* Find the connection associated with the given alert token in the link group. @@ -217,7 +246,8 @@ void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); -int smc_buf_create(struct smc_sock *smc); +void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid); +int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); @@ -227,9 +257,12 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn); + void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, +int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, struct smc_ib_device *smcibdev, u8 ibport, - struct smc_clc_msg_local *lcl, int srv_first_contact); + struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, + u64 peer_gid); +void smcd_conn_free(struct smc_connection *conn); void smc_core_exit(void); #endif -- cgit v1.2.3 From 413498440e30bfe381ac99dfc31628a3d8d4382a Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:11 +0200 Subject: net/smc: add SMC-D support in af_smc This patch ties together the previous SMC-D patches. It adds support for SMC-D to the listen and connect functions and, thus, enables SMC-D support in the SMC code. If a connection supports both SMC-R and SMC-D, SMC-D is preferred. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- net/smc/af_smc.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++----- net/smc/smc_core.c | 2 +- net/smc/smc_core.h | 1 + 3 files changed, 200 insertions(+), 19 deletions(-) (limited to 'net/smc/smc_core.h') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 20afa94be8bb..cbbb947dbfcf 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,7 @@ #include "smc_cdc.h" #include "smc_core.h" #include "smc_ib.h" +#include "smc_ism.h" #include "smc_pnet.h" #include "smc_tx.h" #include "smc_rx.h" @@ -372,8 +374,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) return 0; } -static void smc_conn_save_peer_info(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm *clc) +static void smcr_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) { int bufsize = smc_uncompress_bufsize(clc->rmbe_size); @@ -384,6 +386,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } +static void smcd_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + int bufsize = smc_uncompress_bufsize(clc->dmbe_size); + + smc->conn.peer_rmbe_idx = clc->dmbe_idx; + smc->conn.peer_token = clc->token; + /* msg header takes up space in the buffer */ + smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); + atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); + smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; +} + +static void smc_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + if (smc->conn.lgr->is_smcd) + smcd_conn_save_peer_info(smc, clc); + else + smcr_conn_save_peer_info(smc, clc); +} + static void smc_link_save_peer_info(struct smc_link *link, struct smc_clc_msg_accept_confirm *clc) { @@ -450,15 +474,51 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, return reason_code; } +/* check if there is an ISM device available for this connection. */ +/* called for connect and listen */ +static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev) +{ + /* Find ISM device with same PNETID as connecting interface */ + smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev); + if (!(*ismdev)) + return SMC_CLC_DECL_CNFERR; /* configuration error */ + return 0; +} + +/* Check for VLAN ID and register it on ISM device just for CLC handshake */ +static int smc_connect_ism_vlan_setup(struct smc_sock *smc, + struct smcd_dev *ismdev, + unsigned short vlan_id) +{ + if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id)) + return SMC_CLC_DECL_CNFERR; + return 0; +} + +/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is + * used, the VLAN ID will be registered again during the connection setup. + */ +static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, + struct smcd_dev *ismdev, + unsigned short vlan_id) +{ + if (!is_smcd) + return 0; + if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id)) + return SMC_CLC_DECL_CNFERR; + return 0; +} + /* CLC handshake during connect */ static int smc_connect_clc(struct smc_sock *smc, int smc_type, struct smc_clc_msg_accept_confirm *aclc, - struct smc_ib_device *ibdev, u8 ibport) + struct smc_ib_device *ibdev, u8 ibport, + struct smcd_dev *ismdev) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, NULL); + rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev); if (rc) return rc; /* receive SMC Accept CLC message */ @@ -538,11 +598,50 @@ static int smc_connect_rdma(struct smc_sock *smc, return 0; } +/* setup for ISM connection of client */ +static int smc_connect_ism(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *aclc, + struct smcd_dev *ismdev) +{ + int local_contact = SMC_FIRST_CONTACT; + int rc = 0; + + mutex_lock(&smc_create_lgr_pending); + local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, + NULL, ismdev, aclc->gid); + if (local_contact < 0) + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + + /* Create send and receive buffers */ + if (smc_buf_create(smc, true)) + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); + + smc_conn_save_peer_info(smc, aclc); + smc_close_init(smc); + smc_rx_init(smc); + smc_tx_init(smc); + + rc = smc_clc_send_confirm(smc); + if (rc) + return smc_connect_abort(smc, rc, local_contact); + mutex_unlock(&smc_create_lgr_pending); + + smc_copy_sock_settings_to_clc(smc); + if (smc->sk.sk_state == SMC_INIT) + smc->sk.sk_state = SMC_ACTIVE; + + return 0; +} + /* perform steps before actually connecting */ static int __smc_connect(struct smc_sock *smc) { + bool ism_supported = false, rdma_supported = false; struct smc_clc_msg_accept_confirm aclc; struct smc_ib_device *ibdev; + struct smcd_dev *ismdev; + unsigned short vlan; + int smc_type; int rc = 0; u8 ibport; @@ -559,20 +658,52 @@ static int __smc_connect(struct smc_sock *smc) if (using_ipsec(smc)) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); - /* check if a RDMA device is available; if not, fall back */ - if (smc_check_rdma(smc, &ibdev, &ibport)) + /* check for VLAN ID */ + if (smc_vlan_by_tcpsk(smc->clcsock, &vlan)) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); + + /* check if there is an ism device available */ + if (!smc_check_ism(smc, &ismdev) && + !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) { + /* ISM is supported for this connection */ + ism_supported = true; + smc_type = SMC_TYPE_D; + } + + /* check if there is a rdma device available */ + if (!smc_check_rdma(smc, &ibdev, &ibport)) { + /* RDMA is supported for this connection */ + rdma_supported = true; + if (ism_supported) + smc_type = SMC_TYPE_B; /* both */ + else + smc_type = SMC_TYPE_R; /* only RDMA */ + } + + /* if neither ISM nor RDMA are supported, fallback */ + if (!rdma_supported && !ism_supported) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); /* perform CLC handshake */ - rc = smc_connect_clc(smc, SMC_TYPE_R, &aclc, ibdev, ibport); - if (rc) + rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev); + if (rc) { + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); + } - /* connect using rdma */ - rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); - if (rc) + /* depending on previous steps, connect using rdma or ism */ + if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) + rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); + else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) + rc = smc_connect_ism(smc, &aclc, ismdev); + else + rc = SMC_CLC_DECL_CNFERR; + if (rc) { + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); + } + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return 0; } @@ -909,6 +1040,44 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, return 0; } +/* listen worker: initialize connection and buffers for SMC-D */ +static int smc_listen_ism_init(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smcd_dev *ismdev, + int *local_contact) +{ + struct smc_clc_msg_smcd *pclc_smcd; + + pclc_smcd = smc_get_clc_msg_smcd(pclc); + *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL, + ismdev, pclc_smcd->gid); + if (*local_contact < 0) { + if (*local_contact == -ENOMEM) + return SMC_CLC_DECL_MEM;/* insufficient memory*/ + return SMC_CLC_DECL_INTERR; /* other error */ + } + + /* Check if peer can be reached via ISM device */ + if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, + new_smc->conn.lgr->vlan_id, + new_smc->conn.lgr->smcd)) { + if (*local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); + smc_conn_free(&new_smc->conn); + return SMC_CLC_DECL_CNFERR; + } + + /* Create send and receive buffers */ + if (smc_buf_create(new_smc, true)) { + if (*local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); + smc_conn_free(&new_smc->conn); + return SMC_CLC_DECL_MEM; + } + + return 0; +} + /* listen worker: register buffers */ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) { @@ -967,6 +1136,8 @@ static void smc_listen_work(struct work_struct *work) struct smc_clc_msg_accept_confirm cclc; struct smc_clc_msg_proposal *pclc; struct smc_ib_device *ibdev; + bool ism_supported = false; + struct smcd_dev *ismdev; u8 buf[SMC_CLC_MAX_LEN]; int local_contact = 0; int reason_code = 0; @@ -1007,13 +1178,21 @@ static void smc_listen_work(struct work_struct *work) smc_rx_init(new_smc); smc_tx_init(new_smc); + /* check if ISM is available */ + if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) && + !smc_check_ism(new_smc, &ismdev) && + !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) { + ism_supported = true; + } + /* check if RDMA is available */ - if ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || - smc_check_rdma(new_smc, &ibdev, &ibport) || - smc_listen_rdma_check(new_smc, pclc) || - smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, - &local_contact) || - smc_listen_rdma_reg(new_smc, local_contact)) { + if (!ism_supported && + ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || + smc_check_rdma(new_smc, &ibdev, &ibport) || + smc_listen_rdma_check(new_smc, pclc) || + smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, + &local_contact) || + smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact); @@ -1038,7 +1217,8 @@ static void smc_listen_work(struct work_struct *work) } /* finish worker */ - smc_listen_rdma_finish(new_smc, &cclc, local_contact); + if (!ism_supported) + smc_listen_rdma_finish(new_smc, &cclc, local_contact); smc_conn_save_peer_info(new_smc, &cclc); mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 434c028162a4..66741e61a3b0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -478,7 +478,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid) /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ -static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) { struct dst_entry *dst = sk_dst_get(clcsock->sk); struct net_device *ndev; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index cd9268a9570e..8b47e0168fc3 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -257,6 +257,7 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn); +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id); void smc_conn_free(struct smc_connection *conn); int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, -- cgit v1.2.3 From 00e5fb263f9f5f2af60754b79b7dcec0d5e88154 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jul 2018 13:53:10 +0200 Subject: net/smc: add function to get link group from link Replace a frequently used construct with a more readable variant, reducing the code. Also might come handy when we start to support more than a single per link group. Signed-off-by: Stefan Raspl Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 2 +- net/smc/smc_core.h | 5 +++++ net/smc/smc_ib.c | 3 +-- net/smc/smc_llc.c | 30 ++++++++---------------------- net/smc/smc_wr.c | 27 +++++---------------------- 5 files changed, 20 insertions(+), 47 deletions(-) (limited to 'net/smc/smc_core.h') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index f3a1497953ee..a7af2289cdff 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -365,7 +365,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) return; /* invalid message */ /* lookup connection */ - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + lgr = smc_get_lgr(link); read_lock_bh(&lgr->conns_lock); conn = smc_lgr_find_conn(ntohl(cdc->token), lgr); read_unlock_bh(&lgr->conns_lock); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 8b47e0168fc3..8807865483bb 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -266,4 +266,9 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, u64 peer_gid); void smcd_conn_free(struct smc_connection *conn); void smc_core_exit(void); + +static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) +{ + return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); +} #endif diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 36de2fd76170..4706ab7092a9 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -112,8 +112,7 @@ int smc_ib_modify_qp_reset(struct smc_link *lnk) int smc_ib_ready_link(struct smc_link *lnk) { - struct smc_link_group *lgr = - container_of(lnk, struct smc_link_group, lnk[0]); + struct smc_link_group *lgr = smc_get_lgr(lnk); int rc = 0; rc = smc_ib_modify_qp_init(lnk); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 5800a6b43d83..b7944aa1ffc3 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -186,8 +186,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], union ib_gid *gid, enum smc_llc_reqresp reqresp) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); struct smc_llc_msg_confirm_link *confllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; @@ -381,11 +380,9 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen) static void smc_llc_rx_confirm_link(struct smc_link *link, struct smc_llc_msg_confirm_link *llc) { - struct smc_link_group *lgr; + struct smc_link_group *lgr = smc_get_lgr(link); int conf_rc; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); - /* RMBE eyecatchers are not supported */ if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC) conf_rc = 0; @@ -411,8 +408,7 @@ static void smc_llc_rx_confirm_link(struct smc_link *link, static void smc_llc_rx_add_link(struct smc_link *link, struct smc_llc_msg_add_link *llc) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); if (llc->hd.flags & SMC_LLC_FLAG_RESP) { if (link->state == SMC_LNK_ACTIVATING) @@ -442,8 +438,7 @@ static void smc_llc_rx_add_link(struct smc_link *link, static void smc_llc_rx_delete_link(struct smc_link *link, struct smc_llc_msg_del_link *llc) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); if (llc->hd.flags & SMC_LLC_FLAG_RESP) { if (lgr->role == SMC_SERV) @@ -476,17 +471,14 @@ static void smc_llc_rx_test_link(struct smc_link *link, static void smc_llc_rx_confirm_rkey(struct smc_link *link, struct smc_llc_msg_confirm_rkey *llc) { - struct smc_link_group *lgr; int rc; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { link->llc_confirm_rkey_rc = llc->hd.flags & SMC_LLC_FLAG_RKEY_NEG; complete(&link->llc_confirm_rkey); } else { - rc = smc_rtoken_add(lgr, + rc = smc_rtoken_add(smc_get_lgr(link), llc->rtoken[0].rmb_vaddr, llc->rtoken[0].rmb_key); @@ -514,18 +506,15 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link, static void smc_llc_rx_delete_rkey(struct smc_link *link, struct smc_llc_msg_delete_rkey *llc) { - struct smc_link_group *lgr; u8 err_mask = 0; int i, max; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { /* unused as long as we don't send this type of msg */ } else { max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { - if (smc_rtoken_delete(lgr, llc->rkey[i])) + if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i])) err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); } @@ -583,12 +572,10 @@ static void smc_llc_testlink_work(struct work_struct *work) struct smc_link *link = container_of(to_delayed_work(work), struct smc_link, llc_testlink_wrk); unsigned long next_interval; - struct smc_link_group *lgr; unsigned long expire_time; u8 user_data[16] = { 0 }; int rc; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); if (link->state != SMC_LNK_ACTIVE) return; /* don't reschedule worker */ expire_time = link->wr_rx_tstamp + link->llc_testlink_time; @@ -602,7 +589,7 @@ static void smc_llc_testlink_work(struct work_struct *work) rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); if (rc <= 0) { - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); return; } next_interval = link->llc_testlink_time; @@ -613,8 +600,7 @@ out: int smc_llc_link_init(struct smc_link *link) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM, *((u32 *)lgr->id), link->link_id); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index dbd2605d1962..b6df69756bef 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -92,8 +92,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) return; if (wc->status) { - struct smc_link_group *lgr; - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { /* clear full struct smc_wr_tx_pend including .priv */ memset(&link->wr_tx_pends[i], 0, @@ -103,9 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) clear_bit(i, link->wr_tx_mask); } /* terminate connections of this link group abnormally */ - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); } if (pnd_snd.handler) pnd_snd.handler(&pnd_snd.priv, link, wc->status); @@ -188,8 +184,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { struct smc_link_group *lgr; - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + lgr = smc_get_lgr(link); rc = wait_event_timeout( link->wr_tx_wait, list_empty(&lgr->list) || /* lgr terminated */ @@ -250,12 +245,8 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); if (rc) { - struct smc_link_group *lgr = - container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_wr_tx_put_slot(link, priv); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); } return rc; } @@ -283,11 +274,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) SMC_WR_REG_MR_WAIT_TIME); if (!rc) { /* timeout - terminate connections */ - struct smc_link_group *lgr; - - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); return -EPIPE; } if (rc == -ERESTARTSYS) @@ -380,8 +367,6 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) smc_wr_rx_demultiplex(&wc[i]); smc_wr_rx_post(link); /* refill WR RX */ } else { - struct smc_link_group *lgr; - /* handle status errors */ switch (wc[i].status) { case IB_WC_RETRY_EXC_ERR: @@ -390,9 +375,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) /* terminate connections of this link group * abnormally */ - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); break; default: smc_wr_rx_post(link); /* refill WR RX */ -- cgit v1.2.3 From 144ce4b9b5a788953b5373162a1921267497fb38 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 23 Jul 2018 13:53:11 +0200 Subject: net/smc: use DECLARE_BITMAP for rtokens_used_mask Link group field tokens_used_mask is a bitmap. Use macro DECLARE_BITMAP for its definition. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/smc/smc_core.h') diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 8807865483bb..1e8974c50550 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -192,8 +192,7 @@ struct smc_link_group { struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] [SMC_LINKS_PER_LGR_MAX]; /* remote addr/key pairs */ - unsigned long rtokens_used_mask[BITS_TO_LONGS - (SMC_RMBS_PER_LGR_MAX)]; + DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX); /* used rtoken elements */ }; struct { /* SMC-D */ -- cgit v1.2.3 From 7005ada68d1774d7c1109deaba0c2cd8e46f5091 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Jul 2018 16:35:31 +0200 Subject: net/smc: use correct vlan gid of RoCE device SMC code uses the base gid for VLAN traffic. The gids exchanged in the CLC handshake and the gid index used for the QP have to switch from the base gid to the appropriate vlan gid. When searching for a matching IB device port for a certain vlan device, it does not make sense to return an IB device port, which is not enabled for the used vlan_id. Add another check whether a vlan gid exists for a certain IB device port. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 24 +++++++++++++----------- net/smc/smc_clc.c | 10 ++++------ net/smc/smc_clc.h | 2 +- net/smc/smc_core.c | 37 ++++--------------------------------- net/smc/smc_core.h | 5 +++-- net/smc/smc_diag.c | 2 +- net/smc/smc_ib.c | 41 ++++++++++++++++++++++++++++++++++++----- net/smc/smc_ib.h | 3 ++- net/smc/smc_llc.c | 15 +++++---------- net/smc/smc_llc.h | 2 +- net/smc/smc_pnet.c | 30 +++++++++++++++++++++--------- net/smc/smc_pnet.h | 3 ++- 12 files changed, 93 insertions(+), 81 deletions(-) (limited to 'net/smc/smc_core.h') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7883f70f7c6d..b81797103260 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -370,8 +370,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) /* send add link reject message, only one link supported for now */ rc = smc_llc_send_add_link(link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_RESP); + link->gid, SMC_LLC_RESP); if (rc < 0) return SMC_CLC_DECL_TCL; @@ -469,7 +468,7 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, /* check if there is a rdma device available for this connection. */ /* called for connect and listen */ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, - u8 *ibport) + u8 *ibport, unsigned short vlan_id, u8 gid[]) { int reason_code = 0; @@ -477,7 +476,8 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, * within same PNETID that also contains the ethernet device * used for the internal TCP socket */ - smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport); + smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id, + gid); if (!(*ibdev)) reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ @@ -523,12 +523,12 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, static int smc_connect_clc(struct smc_sock *smc, int smc_type, struct smc_clc_msg_accept_confirm *aclc, struct smc_ib_device *ibdev, u8 ibport, - struct smcd_dev *ismdev) + u8 gid[], struct smcd_dev *ismdev) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev); + rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev); if (rc) return rc; /* receive SMC Accept CLC message */ @@ -650,6 +650,7 @@ static int __smc_connect(struct smc_sock *smc) struct smc_clc_msg_accept_confirm aclc; struct smc_ib_device *ibdev; struct smcd_dev *ismdev; + u8 gid[SMC_GID_SIZE]; unsigned short vlan; int smc_type; int rc = 0; @@ -681,7 +682,7 @@ static int __smc_connect(struct smc_sock *smc) } /* check if there is a rdma device available */ - if (!smc_check_rdma(smc, &ibdev, &ibport)) { + if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) { /* RDMA is supported for this connection */ rdma_supported = true; if (ism_supported) @@ -695,7 +696,7 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); /* perform CLC handshake */ - rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev); + rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev); if (rc) { smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); @@ -970,8 +971,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) /* send ADD LINK request to client over the RoCE fabric */ rc = smc_llc_send_add_link(link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_REQ); + link->gid, SMC_LLC_REQ); if (rc < 0) return SMC_CLC_DECL_TCL; @@ -1193,6 +1193,7 @@ static void smc_listen_work(struct work_struct *work) struct smcd_dev *ismdev; u8 buf[SMC_CLC_MAX_LEN]; int local_contact = 0; + unsigned short vlan; int reason_code = 0; int rc = 0; u8 ibport; @@ -1241,7 +1242,8 @@ static void smc_listen_work(struct work_struct *work) /* check if RDMA is available */ if (!ism_supported && ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || - smc_check_rdma(new_smc, &ibdev, &ibport) || + smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) || + smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) || smc_listen_rdma_check(new_smc, pclc) || smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, &local_contact) || diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ad39efdb4f1c..78d74938a9d9 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) /* send CLC PROPOSAL message across internal TCP socket */ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_ib_device *ibdev, u8 ibport, + struct smc_ib_device *ibdev, u8 ibport, u8 gid[], struct smcd_dev *ismdev) { struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; @@ -409,7 +409,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, /* add SMC-R specifics */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&pclc.lcl.gid, &ibdev->gid[ibport - 1], SMC_GID_SIZE); + memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN); pclc.iparea_offset = htons(0); } @@ -492,8 +492,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); + memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE); memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); @@ -566,8 +565,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) link = &conn->lgr->lnk[SMC_SINGLE_LINK]; memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); + memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE); memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 100e988ad1a8..6bdc63352d6a 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -179,7 +179,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_ib_device *smcibdev, u8 ibport, + struct smc_ib_device *smcibdev, u8 ibport, u8 gid[], struct smcd_dev *ismdev); int smc_clc_send_confirm(struct smc_sock *smc); int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 66741e61a3b0..90c10ae9ae09 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -219,6 +219,10 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, get_random_bytes(rndvec, sizeof(rndvec)); lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); + rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, + vlan_id, lnk->gid, &lnk->sgid_index); + if (rc) + goto free_lgr; rc = smc_llc_link_init(lnk); if (rc) goto free_lgr; @@ -522,37 +526,6 @@ out: return rc; } -/* determine the link gid matching the vlan id of the link group */ -static int smc_link_determine_gid(struct smc_link_group *lgr) -{ - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - struct ib_gid_attr gattr; - union ib_gid gid; - int i; - - if (!lgr->vlan_id) { - lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; - return 0; - } - - for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; - i++) { - if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, - &gattr)) - continue; - if (gattr.ndev) { - if (is_vlan_dev(gattr.ndev) && - vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { - lnk->gid = gid; - dev_put(gattr.ndev); - return 0; - } - dev_put(gattr.ndev); - } - } - return -ENODEV; -} - static bool smcr_lgr_match(struct smc_link_group *lgr, struct smc_clc_msg_local *lcl, enum smc_lgr_role role) @@ -631,8 +604,6 @@ create: if (rc) goto out; smc_lgr_register_conn(conn); /* add smc conn to lgr */ - if (!is_smcd) - rc = smc_link_determine_gid(conn->lgr); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 1e8974c50550..a4f0cc4e0270 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -84,14 +84,15 @@ struct smc_link { wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ - union ib_gid gid; /* gid matching used vlan id */ + u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ + u8 sgid_index; /* gid index for vlan id */ u32 peer_qpn; /* QP number of peer */ enum ib_mtu path_mtu; /* used mtu */ enum ib_mtu peer_mtu; /* mtu size of peer */ u32 psn_initial; /* QP tx initial packet seqno */ u32 peer_psn; /* QP rx initial packet seqno */ u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ - u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/ + u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ enum smc_link_state state; /* state of link */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d772cd10297e..a3cf7313a2d3 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -154,7 +154,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, smc->conn.lgr->lnk[0].smcibdev->ibdev->name, sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, - smc->conn.lgr->lnk[0].gid.raw); + smc->conn.lgr->lnk[0].gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, smc->conn.lgr->lnk[0].peer_gid); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 4706ab7092a9..2cc64bc8ae20 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -68,7 +68,7 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); - rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0); + rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, sizeof(lnk->peer_mac)); @@ -142,13 +142,13 @@ out: return rc; } -static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) +static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) { struct ib_gid_attr gattr; + union ib_gid gid; int rc; - rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], &gattr); + rc = ib_query_gid(smcibdev->ibdev, ibport, 0, &gid, &gattr); if (rc || !gattr.ndev) return -ENODEV; @@ -175,6 +175,37 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } +/* determine the gid for an ib-device port and vlan id */ +int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, + unsigned short vlan_id, u8 gid[], u8 *sgid_index) +{ + struct ib_gid_attr gattr; + union ib_gid _gid; + int i; + + for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { + memset(&_gid, 0, SMC_GID_SIZE); + memset(&gattr, 0, sizeof(gattr)); + if (ib_query_gid(smcibdev->ibdev, ibport, i, &_gid, &gattr)) + continue; + if (!gattr.ndev) + continue; + if (((!vlan_id && !is_vlan_dev(gattr.ndev)) || + (vlan_id && is_vlan_dev(gattr.ndev) && + vlan_dev_vlan_id(gattr.ndev) == vlan_id)) && + gattr.gid_type == IB_GID_TYPE_IB) { + if (gid) + memcpy(gid, &_gid, SMC_GID_SIZE); + if (sgid_index) + *sgid_index = i; + dev_put(gattr.ndev); + return 0; + } + dev_put(gattr.ndev); + } + return -ENODEV; +} + static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) { int rc; @@ -186,7 +217,7 @@ static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) if (rc) goto out; /* the SMC protocol requires specification of the RoCE MAC address */ - rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); + rc = smc_ib_fill_mac(smcibdev, ibport); if (rc) goto out; if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 7c1223c91229..bac7fd65a4c0 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -40,7 +40,6 @@ struct smc_ib_device { /* ib-device infos for smc */ struct tasklet_struct recv_tasklet; /* called by recv cq handler */ char mac[SMC_MAX_PORTS][ETH_ALEN]; /* mac address per port*/ - union ib_gid gid[SMC_MAX_PORTS]; /* gid per port */ u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; /* pnetid per port */ u8 initialized : 1; /* ib dev CQ, evthdl done */ @@ -77,4 +76,6 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); +int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, + unsigned short vlan_id, u8 gid[], u8 *sgid_index); #endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index f2ba99c2e69a..a88c01029fa6 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -203,8 +203,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, confllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); - memcpy(confllc->sender_gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); + memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); @@ -241,8 +240,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link, /* prepare an add link message */ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, - struct smc_link *link, u8 mac[], - union ib_gid *gid, + struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp) { memset(addllc, 0, sizeof(*addllc)); @@ -259,8 +257,7 @@ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, } /* send ADD LINK request or response */ -int smc_llc_send_add_link(struct smc_link *link, u8 mac[], - union ib_gid *gid, +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp) { struct smc_llc_msg_add_link *addllc; @@ -423,14 +420,12 @@ static void smc_llc_rx_add_link(struct smc_link *link, if (lgr->role == SMC_SERV) { smc_llc_prep_add_link(llc, link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_REQ); + link->gid, SMC_LLC_REQ); } else { smc_llc_prep_add_link(llc, link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_RESP); + link->gid, SMC_LLC_RESP); } smc_llc_send_message(link, llc, sizeof(*llc)); } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 9a29fcbbcea8..95a7f3662e59 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -38,7 +38,7 @@ enum smc_llc_msg_type { /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, enum smc_llc_reqresp reqresp); -int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp); int smc_llc_send_delete_link(struct smc_link *link, enum smc_llc_reqresp reqresp); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 1b6c066d3495..01c6ce042a1c 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -535,11 +535,13 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) } /* Determine the corresponding IB device port based on the hardware PNETID. - * Searching stops at the first matching active IB device port. + * Searching stops at the first matching active IB device port with vlan_id + * configured. */ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_ib_device **smcibdev, - u8 *ibport) + u8 *ibport, unsigned short vlan_id, + u8 gid[]) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct smc_ib_device *ibdev; @@ -553,15 +555,20 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, spin_lock(&smc_ib_devices.lock); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!rdma_is_port_valid(ibdev->ibdev, i)) + continue; if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, SMC_MAX_PNETID_LEN) && - smc_ib_port_active(ibdev, i)) { + smc_ib_port_active(ibdev, i) && + !smc_ib_determine_gid(ibdev, i, vlan_id, gid, + NULL)) { *smcibdev = ibdev; *ibport = i; - break; + goto out; } } } +out: spin_unlock(&smc_ib_devices.lock); } @@ -589,7 +596,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, /* Lookup of coupled ib_device via SMC pnet table */ static void smc_pnet_find_roce_by_table(struct net_device *netdev, struct smc_ib_device **smcibdev, - u8 *ibport) + u8 *ibport, unsigned short vlan_id, + u8 gid[]) { struct smc_pnetentry *pnetelem; @@ -597,7 +605,10 @@ static void smc_pnet_find_roce_by_table(struct net_device *netdev, list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { if (netdev == pnetelem->ndev) { if (smc_ib_port_active(pnetelem->smcibdev, - pnetelem->ib_port)) { + pnetelem->ib_port) && + !smc_ib_determine_gid(pnetelem->smcibdev, + pnetelem->ib_port, vlan_id, + gid, NULL)) { *smcibdev = pnetelem->smcibdev; *ibport = pnetelem->ib_port; } @@ -612,7 +623,8 @@ static void smc_pnet_find_roce_by_table(struct net_device *netdev, * ethernet interface. */ void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport) + struct smc_ib_device **smcibdev, u8 *ibport, + unsigned short vlan_id, u8 gid[]) { struct dst_entry *dst = sk_dst_get(sk); @@ -625,12 +637,12 @@ void smc_pnet_find_roce_resource(struct sock *sk, goto out_rel; /* if possible, lookup via hardware-defined pnetid */ - smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport); + smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); if (*smcibdev) goto out_rel; /* lookup via SMC PNET table */ - smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport); + smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid); out_rel: dst_release(dst); diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 1e94fd4df7bc..8ff777636e32 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -33,7 +33,8 @@ int smc_pnet_init(void) __init; void smc_pnet_exit(void); int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport); + struct smc_ib_device **smcibdev, u8 *ibport, + unsigned short vlan_id, u8 gid[]); void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev); #endif -- cgit v1.2.3 From 0d18a0cb4b1585d9e5a3b300d5df9ed866561ffb Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 25 Jul 2018 16:35:33 +0200 Subject: net/smc: improve delete link processing Send an orderly DELETE LINK request before termination of a link group, add support for client triggered DELETE LINK processing. And send a disorderly DELETE LINK before module is unloaded. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- net/smc/smc_core.h | 4 +++- net/smc/smc_llc.c | 30 +++++++++++++++++++----------- net/smc/smc_llc.h | 3 ++- net/smc/smc_wr.c | 7 ++----- 5 files changed, 68 insertions(+), 23 deletions(-) (limited to 'net/smc/smc_core.h') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 90c10ae9ae09..a46418f45ecd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -30,6 +30,7 @@ #define SMC_LGR_NUM_INCR 256 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) +#define SMC_LGR_FREE_DELAY_FAST (8 * HZ) static struct smc_lgr_list smc_lgr_list = { /* established link groups */ .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), @@ -51,6 +52,11 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); } +void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) +{ + mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST); +} + /* Register connection's alert token in our lookup structure. * To use rbtrees we have to implement our own insert core. * Requires @conns_lock @@ -133,6 +139,20 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) smc_lgr_schedule_free_work(lgr); } +/* Send delete link, either as client to request the initiation + * of the DELETE LINK sequence from server; or as server to + * initiate the delete processing. See smc_llc_rx_delete_link(). + */ +static int smc_link_send_delete(struct smc_link *lnk) +{ + if (lnk->state == SMC_LNK_ACTIVE && + !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { + smc_llc_link_deleting(lnk); + return 0; + } + return -ENOTCONN; +} + static void smc_lgr_free_work(struct work_struct *work) { struct smc_link_group *lgr = container_of(to_delayed_work(work), @@ -153,10 +173,21 @@ static void smc_lgr_free_work(struct work_struct *work) list_del_init(&lgr->list); /* remove from smc_lgr_list */ free: spin_unlock_bh(&smc_lgr_list.lock); + + if (!lgr->is_smcd && !lgr->terminating) { + /* try to send del link msg, on error free lgr immediately */ + if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) { + /* reschedule in case we never receive a response */ + smc_lgr_schedule_free_work(lgr); + return; + } + } + if (!delayed_work_pending(&lgr->free_work)) { - if (!lgr->is_smcd && - lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + + if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) + smc_llc_link_inactive(lnk); smc_lgr_free(lgr); } } @@ -984,8 +1015,14 @@ void smc_core_exit(void) spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); - if (!lgr->is_smcd) - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + if (!lgr->is_smcd) { + struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + + if (lnk->state == SMC_LNK_ACTIVE) + smc_llc_send_delete_link(lnk, SMC_LLC_REQ, + false); + smc_llc_link_inactive(lnk); + } cancel_delayed_work_sync(&lgr->free_work); smc_lgr_free(lgr); /* free link group */ } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index a4f0cc4e0270..c156674733c9 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -34,7 +34,8 @@ enum smc_lgr_role { /* possible roles of a link group */ enum smc_link_state { /* possible states of a link */ SMC_LNK_INACTIVE, /* link is inactive */ SMC_LNK_ACTIVATING, /* link is being activated */ - SMC_LNK_ACTIVE /* link is active */ + SMC_LNK_ACTIVE, /* link is active */ + SMC_LNK_DELETING, /* link is being deleted */ }; #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ @@ -265,6 +266,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, u64 peer_gid); void smcd_conn_free(struct smc_connection *conn); +void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); void smc_core_exit(void); static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a88c01029fa6..9c916c709ca7 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -278,7 +278,7 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], /* prepare a delete link message */ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc, struct smc_link *link, - enum smc_llc_reqresp reqresp) + enum smc_llc_reqresp reqresp, bool orderly) { memset(delllc, 0, sizeof(*delllc)); delllc->hd.common.type = SMC_LLC_DELETE_LINK; @@ -287,13 +287,14 @@ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc, delllc->hd.flags |= SMC_LLC_FLAG_RESP; /* DEL_LINK_ALL because only 1 link supported */ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; - delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + if (orderly) + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; delllc->link_num = link->link_id; } /* send DELETE LINK request or response */ int smc_llc_send_delete_link(struct smc_link *link, - enum smc_llc_reqresp reqresp) + enum smc_llc_reqresp reqresp, bool orderly) { struct smc_llc_msg_del_link *delllc; struct smc_wr_tx_pend_priv *pend; @@ -304,7 +305,7 @@ int smc_llc_send_delete_link(struct smc_link *link, if (rc) return rc; delllc = (struct smc_llc_msg_del_link *)wr_buf; - smc_llc_prep_delete_link(delllc, link, reqresp); + smc_llc_prep_delete_link(delllc, link, reqresp, orderly); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -438,17 +439,19 @@ static void smc_llc_rx_delete_link(struct smc_link *link, if (llc->hd.flags & SMC_LLC_FLAG_RESP) { if (lgr->role == SMC_SERV) - smc_lgr_terminate(lgr); + smc_lgr_schedule_free_work_fast(lgr); } else { + smc_lgr_forget(lgr); + smc_llc_link_deleting(link); if (lgr->role == SMC_SERV) { - smc_lgr_forget(lgr); - smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ); - smc_llc_send_message(link, llc, sizeof(*llc)); + /* client asks to delete this link, send request */ + smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true); } else { - smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP); - smc_llc_send_message(link, llc, sizeof(*llc)); - smc_lgr_terminate(lgr); + /* server requests to delete this link, send response */ + smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true); } + smc_llc_send_message(link, llc, sizeof(*llc)); + smc_lgr_schedule_free_work_fast(lgr); } } @@ -622,6 +625,11 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time) } } +void smc_llc_link_deleting(struct smc_link *link) +{ + link->state = SMC_LNK_DELETING; +} + /* called in tasklet context */ void smc_llc_link_inactive(struct smc_link *link) { diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 95a7f3662e59..9e2ff088e301 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -41,9 +41,10 @@ int smc_llc_send_confirm_link(struct smc_link *lnk, int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp); int smc_llc_send_delete_link(struct smc_link *link, - enum smc_llc_reqresp reqresp); + enum smc_llc_reqresp reqresp, bool orderly); int smc_llc_link_init(struct smc_link *link); void smc_llc_link_active(struct smc_link *link, int testlink_time); +void smc_llc_link_deleting(struct smc_link *link); void smc_llc_link_inactive(struct smc_link *link); void smc_llc_link_clear(struct smc_link *link); int smc_llc_do_confirm_rkey(struct smc_link *link, diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b6df69756bef..f856b8402b3f 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -182,17 +182,14 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { - struct smc_link_group *lgr; - - lgr = smc_get_lgr(link); rc = wait_event_timeout( link->wr_tx_wait, - list_empty(&lgr->list) || /* lgr terminated */ + link->state == SMC_LNK_INACTIVE || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); return -EPIPE; } if (idx == link->wr_tx_cnt) -- cgit v1.2.3