diff options
Diffstat (limited to 'net/rds')
| -rw-r--r-- | net/rds/ib.c | 10 | ||||
| -rw-r--r-- | net/rds/ib.h | 15 | ||||
| -rw-r--r-- | net/rds/ib_cm.c | 190 | ||||
| -rw-r--r-- | net/rds/ib_recv.c | 13 | ||||
| -rw-r--r-- | net/rds/ib_send.c | 19 | 
5 files changed, 174 insertions, 73 deletions
| diff --git a/net/rds/ib.c b/net/rds/ib.c index 9de2ae22d583..3fd5f40189bd 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -30,6 +30,7 @@   * SOFTWARE.   *   */ +#include <linux/dmapool.h>  #include <linux/kernel.h>  #include <linux/in.h>  #include <linux/if.h> @@ -107,6 +108,7 @@ static void rds_ib_dev_free(struct work_struct *work)  		rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);  	if (rds_ibdev->pd)  		ib_dealloc_pd(rds_ibdev->pd); +	dma_pool_destroy(rds_ibdev->rid_hdrs_pool);  	list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {  		list_del(&i_ipaddr->list); @@ -182,6 +184,12 @@ static void rds_ib_add_one(struct ib_device *device)  		rds_ibdev->pd = NULL;  		goto put_dev;  	} +	rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, +						   device->dma_device, +						   sizeof(struct rds_header), +						   L1_CACHE_BYTES, 0); +	if (!rds_ibdev->rid_hdrs_pool) +		goto put_dev;  	rds_ibdev->mr_1m_pool =  		rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); diff --git a/net/rds/ib.h b/net/rds/ib.h index f2b558e8b5ea..6e6f24753998 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -165,8 +165,8 @@ struct rds_ib_connection {  	/* tx */  	struct rds_ib_work_ring	i_send_ring;  	struct rm_data_op	*i_data_op; -	struct rds_header	*i_send_hdrs; -	dma_addr_t		i_send_hdrs_dma; +	struct rds_header	**i_send_hdrs; +	dma_addr_t		*i_send_hdrs_dma;  	struct rds_ib_send_work *i_sends;  	atomic_t		i_signaled_sends; @@ -175,8 +175,8 @@ struct rds_ib_connection {  	struct rds_ib_work_ring	i_recv_ring;  	struct rds_ib_incoming	*i_ibinc;  	u32			i_recv_data_rem; -	struct rds_header	*i_recv_hdrs; -	dma_addr_t		i_recv_hdrs_dma; +	struct rds_header	**i_recv_hdrs; +	dma_addr_t		*i_recv_hdrs_dma;  	struct rds_ib_recv_work *i_recvs;  	u64			i_ack_recv;	/* last ACK received */  	struct rds_ib_refill_cache i_cache_incs; @@ -246,6 +246,7 @@ struct rds_ib_device {  	struct list_head	conn_list;  	struct ib_device	*dev;  	struct ib_pd		*pd; +	struct dma_pool		*rid_hdrs_pool; /* RDS headers DMA pool */  	bool                    use_fastreg;  	unsigned int		max_mrs; @@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,  int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);  void rds_ib_cm_connect_complete(struct rds_connection *conn,  				struct rdma_cm_event *event); - +struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, +				       struct dma_pool *pool, +				       dma_addr_t **dma_addrs, u32 num_hdrs); +void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, +		       dma_addr_t *dma_addrs, u32 num_hdrs);  #define rds_ib_conn_error(conn, fmt...) \  	__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 233f1368162b..c71f4328d138 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -30,6 +30,7 @@   * SOFTWARE.   *   */ +#include <linux/dmapool.h>  #include <linux/kernel.h>  #include <linux/in.h>  #include <linux/slab.h> @@ -439,6 +440,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)  	rds_ibdev->vector_load[index]--;  } +/* Allocate DMA coherent memory to be used to store struct rds_header for + * sending/receiving packets.  The pointers to the DMA memory and the + * associated DMA addresses are stored in two arrays. + * + * @ibdev: the IB device + * @pool: the DMA memory pool + * @dma_addrs: pointer to the array for storing DMA addresses + * @num_hdrs: number of headers to allocate + * + * It returns the pointer to the array storing the DMA memory pointers.  On + * error, NULL pointer is returned. + */ +struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, +				       struct dma_pool *pool, +				       dma_addr_t **dma_addrs, u32 num_hdrs) +{ +	struct rds_header **hdrs; +	dma_addr_t *hdr_daddrs; +	u32 i; + +	hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, +			     ibdev_to_node(ibdev)); +	if (!hdrs) +		return NULL; + +	hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, +				   ibdev_to_node(ibdev)); +	if (!hdr_daddrs) { +		kvfree(hdrs); +		return NULL; +	} + +	for (i = 0; i < num_hdrs; i++) { +		hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); +		if (!hdrs[i]) { +			rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); +			return NULL; +		} +	} + +	*dma_addrs = hdr_daddrs; +	return hdrs; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @pool: the DMA memory pool + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, +		       dma_addr_t *dma_addrs, u32 num_hdrs) +{ +	u32 i; + +	for (i = 0; i < num_hdrs; i++) +		dma_pool_free(pool, hdrs[i], dma_addrs[i]); +	kvfree(hdrs); +	kvfree(dma_addrs); +} +  /*   * This needs to be very careful to not leave IS_ERR pointers around for   * cleanup to trip over. @@ -450,7 +513,9 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  	struct ib_qp_init_attr attr;  	struct ib_cq_init_attr cq_attr = {};  	struct rds_ib_device *rds_ibdev; +	unsigned long max_wrs;  	int ret, fr_queue_space; +	struct dma_pool *pool;  	/*  	 * It's normal to see a null device if an incoming connection races @@ -469,10 +534,15 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  	/* add the conn now so that connection establishment has the dev */  	rds_ib_add_conn(rds_ibdev, conn); -	if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) -		rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); -	if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) -		rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); +	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ? +		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr; +	if (ic->i_send_ring.w_nr != max_wrs) +		rds_ib_ring_resize(&ic->i_send_ring, max_wrs); + +	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ? +		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr; +	if (ic->i_recv_ring.w_nr != max_wrs) +		rds_ib_ring_resize(&ic->i_recv_ring, max_wrs);  	/* Protection domain and memory range */  	ic->i_pd = rds_ibdev->pd; @@ -541,31 +611,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  		goto recv_cq_out;  	} -	ic->i_send_hdrs = ib_dma_alloc_coherent(dev, -					   ic->i_send_ring.w_nr * -						sizeof(struct rds_header), -					   &ic->i_send_hdrs_dma, GFP_KERNEL); +	pool = rds_ibdev->rid_hdrs_pool; +	ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, +					     ic->i_send_ring.w_nr);  	if (!ic->i_send_hdrs) {  		ret = -ENOMEM; -		rdsdebug("ib_dma_alloc_coherent send failed\n"); +		rdsdebug("DMA send hdrs alloc failed\n");  		goto qp_out;  	} -	ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, -					   ic->i_recv_ring.w_nr * -						sizeof(struct rds_header), -					   &ic->i_recv_hdrs_dma, GFP_KERNEL); +	ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, +					     ic->i_recv_ring.w_nr);  	if (!ic->i_recv_hdrs) {  		ret = -ENOMEM; -		rdsdebug("ib_dma_alloc_coherent recv failed\n"); +		rdsdebug("DMA recv hdrs alloc failed\n");  		goto send_hdrs_dma_out;  	} -	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), -				       &ic->i_ack_dma, GFP_KERNEL); +	ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, +				    &ic->i_ack_dma);  	if (!ic->i_ack) {  		ret = -ENOMEM; -		rdsdebug("ib_dma_alloc_coherent ack failed\n"); +		rdsdebug("DMA ack header alloc failed\n");  		goto recv_hdrs_dma_out;  	} @@ -596,17 +663,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  sends_out:  	vfree(ic->i_sends); +  ack_dma_out: -	ib_dma_free_coherent(dev, sizeof(struct rds_header), -			     ic->i_ack, ic->i_ack_dma); +	dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); +	ic->i_ack = NULL; +  recv_hdrs_dma_out: -	ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * -					sizeof(struct rds_header), -					ic->i_recv_hdrs, ic->i_recv_hdrs_dma); +	rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, +			  ic->i_recv_ring.w_nr); +	ic->i_recv_hdrs = NULL; +	ic->i_recv_hdrs_dma = NULL; +  send_hdrs_dma_out: -	ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * -					sizeof(struct rds_header), -					ic->i_send_hdrs, ic->i_send_hdrs_dma); +	rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, +			  ic->i_send_ring.w_nr); +	ic->i_send_hdrs = NULL; +	ic->i_send_hdrs_dma = NULL; +  qp_out:  	rdma_destroy_qp(ic->i_cm_id);  recv_cq_out: @@ -984,8 +1057,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)  		 ic->i_cm_id ? ic->i_cm_id->qp : NULL);  	if (ic->i_cm_id) { -		struct ib_device *dev = ic->i_cm_id->device; -  		rdsdebug("disconnecting cm %p\n", ic->i_cm_id);  		err = rdma_disconnect(ic->i_cm_id);  		if (err) { @@ -1035,24 +1106,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)  			ib_destroy_cq(ic->i_recv_cq);  		} -		/* then free the resources that ib callbacks use */ -		if (ic->i_send_hdrs) -			ib_dma_free_coherent(dev, -					   ic->i_send_ring.w_nr * -						sizeof(struct rds_header), -					   ic->i_send_hdrs, -					   ic->i_send_hdrs_dma); - -		if (ic->i_recv_hdrs) -			ib_dma_free_coherent(dev, -					   ic->i_recv_ring.w_nr * -						sizeof(struct rds_header), -					   ic->i_recv_hdrs, -					   ic->i_recv_hdrs_dma); - -		if (ic->i_ack) -			ib_dma_free_coherent(dev, sizeof(struct rds_header), -					     ic->i_ack, ic->i_ack_dma); +		if (ic->rds_ibdev) { +			struct dma_pool *pool; + +			pool = ic->rds_ibdev->rid_hdrs_pool; + +			/* then free the resources that ib callbacks use */ +			if (ic->i_send_hdrs) { +				rds_dma_hdrs_free(pool, ic->i_send_hdrs, +						  ic->i_send_hdrs_dma, +						  ic->i_send_ring.w_nr); +				ic->i_send_hdrs = NULL; +				ic->i_send_hdrs_dma = NULL; +			} + +			if (ic->i_recv_hdrs) { +				rds_dma_hdrs_free(pool, ic->i_recv_hdrs, +						  ic->i_recv_hdrs_dma, +						  ic->i_recv_ring.w_nr); +				ic->i_recv_hdrs = NULL; +				ic->i_recv_hdrs_dma = NULL; +			} + +			if (ic->i_ack) { +				dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); +				ic->i_ack = NULL; +			} +		} else { +			WARN_ON(ic->i_send_hdrs); +			WARN_ON(ic->i_send_hdrs_dma); +			WARN_ON(ic->i_recv_hdrs); +			WARN_ON(ic->i_recv_hdrs_dma); +			WARN_ON(ic->i_ack); +		}  		if (ic->i_sends)  			rds_ib_send_clear_ring(ic); @@ -1071,9 +1157,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)  		ic->i_pd = NULL;  		ic->i_send_cq = NULL;  		ic->i_recv_cq = NULL; -		ic->i_send_hdrs = NULL; -		ic->i_recv_hdrs = NULL; -		ic->i_ack = NULL;  	}  	BUG_ON(ic->rds_ibdev); @@ -1099,8 +1182,9 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)  	ic->i_flowctl = 0;  	atomic_set(&ic->i_credits, 0); -	rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); -	rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); +	/* Re-init rings, but retain sizes. */ +	rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr); +	rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr);  	if (ic->i_ibinc) {  		rds_inc_put(&ic->i_ibinc->ii_inc); @@ -1147,8 +1231,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)  	 * rds_ib_conn_shutdown() waits for these to be emptied so they  	 * must be initialized before it can be called.  	 */ -	rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); -	rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); +	rds_ib_ring_init(&ic->i_send_ring, 0); +	rds_ib_ring_init(&ic->i_recv_ring, 0);  	ic->conn = conn;  	conn->c_transport_data = ic; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index a0f99bbf362c..694d411dc72f 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)  		recv->r_wr.num_sge = RDS_IB_RECV_SGE;  		sge = &recv->r_sge[0]; -		sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); +		sge->addr = ic->i_recv_hdrs_dma[i];  		sge->length = sizeof(struct rds_header);  		sge->lkey = ic->i_pd->local_dma_lkey; @@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,  	WARN_ON(ret != 1);  	sge = &recv->r_sge[0]; -	sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); +	sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];  	sge->length = sizeof(struct rds_header);  	sge = &recv->r_sge[1]; @@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,  	}  	data_len -= sizeof(struct rds_header); -	ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; +	ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];  	/* Validate the checksum. */  	if (!rds_message_verify_checksum(ihdr)) { @@ -993,10 +993,11 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,  	} else {  		/* We expect errors as the qp is drained during shutdown */  		if (rds_conn_up(conn) || rds_conn_connecting(conn)) -			rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n", +			rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",  					  &conn->c_laddr, &conn->c_faddr,  					  conn->c_tos, wc->status, -					  ib_wc_status_msg(wc->status)); +					  ib_wc_status_msg(wc->status), +					  wc->vendor_err);  	}  	/* rds_ib_process_recv() doesn't always consume the frag, and diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe6237dafe2..d1cc1d7778d8 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)  		send->s_wr.ex.imm_data = 0;  		sge = &send->s_sge[0]; -		sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); +		sge->addr = ic->i_send_hdrs_dma[i]; +  		sge->length = sizeof(struct rds_header);  		sge->lkey = ic->i_pd->local_dma_lkey; @@ -300,10 +301,10 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)  	/* We expect errors as the qp is drained during shutdown */  	if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { -		rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n", +		rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",  				  &conn->c_laddr, &conn->c_faddr,  				  conn->c_tos, wc->status, -				  ib_wc_status_msg(wc->status)); +				  ib_wc_status_msg(wc->status), wc->vendor_err);  	}  } @@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,  		send->s_queued = jiffies;  		send->s_op = NULL; -		send->s_sge[0].addr = ic->i_send_hdrs_dma -			+ (pos * sizeof(struct rds_header)); +		send->s_sge[0].addr = ic->i_send_hdrs_dma[pos]; +  		send->s_sge[0].length = sizeof(struct rds_header); -		memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); +		memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, +		       sizeof(struct rds_header)); +  		/* Set up the data, if present */  		if (i < work_alloc @@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,  			 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);  		if (ic->i_flowctl && adv_credits) { -			struct rds_header *hdr = &ic->i_send_hdrs[pos]; +			struct rds_header *hdr = ic->i_send_hdrs[pos];  			/* add credit and redo the header checksum */  			hdr->h_credit = adv_credits; | 

