diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx4/mr.c')
| -rw-r--r-- | drivers/infiniband/hw/mlx4/mr.c | 286 | 
1 files changed, 261 insertions, 25 deletions
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index e6f77f63da75..4975f3e6596e 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -87,50 +87,286 @@ err_free:  	return ERR_PTR(err);  } +enum { +	MLX4_MAX_MTT_SHIFT = 31 +}; + +static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, +					struct mlx4_mtt *mtt, +					u64 mtt_size, u64 mtt_shift, u64 len, +					u64 cur_start_addr, u64 *pages, +					int *start_index, int *npages) +{ +	u64 cur_end_addr = cur_start_addr + len; +	u64 cur_end_addr_aligned = 0; +	u64 mtt_entries; +	int err = 0; +	int k; + +	len += (cur_start_addr & (mtt_size - 1ULL)); +	cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); +	len += (cur_end_addr_aligned - cur_end_addr); +	if (len & (mtt_size - 1ULL)) { +		pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n", +			len, mtt_size); +		return -EINVAL; +	} + +	mtt_entries = (len >> mtt_shift); + +	/* +	 * Align the MTT start address to the mtt_size. +	 * Required to handle cases when the MR starts in the middle of an MTT +	 * record. Was not required in old code since the physical addresses +	 * provided by the dma subsystem were page aligned, which was also the +	 * MTT size. +	 */ +	cur_start_addr = round_down(cur_start_addr, mtt_size); +	/* A new block is started ... */ +	for (k = 0; k < mtt_entries; ++k) { +		pages[*npages] = cur_start_addr + (mtt_size * k); +		(*npages)++; +		/* +		 * Be friendly to mlx4_write_mtt() and pass it chunks of +		 * appropriate size. +		 */ +		if (*npages == PAGE_SIZE / sizeof(u64)) { +			err = mlx4_write_mtt(dev->dev, mtt, *start_index, +					     *npages, pages); +			if (err) +				return err; + +			(*start_index) += *npages; +			*npages = 0; +		} +	} + +	return 0; +} + +static inline u64 alignment_of(u64 ptr) +{ +	return ilog2(ptr & (~(ptr - 1))); +} + +static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, +				       u64 current_block_end, +				       u64 block_shift) +{ +	/* Check whether the alignment of the new block is aligned as well as +	 * the previous block. +	 * Block address must start with zeros till size of entity_size. +	 */ +	if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) +		/* +		 * It is not as well aligned as the previous block-reduce the +		 * mtt size accordingly. Here we take the last right bit which +		 * is 1. +		 */ +		block_shift = alignment_of(next_block_start); + +	/* +	 * Check whether the alignment of the end of previous block - is it +	 * aligned as well as the start of the block +	 */ +	if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) +		/* +		 * It is not as well aligned as the start of the block - +		 * reduce the mtt size accordingly. +		 */ +		block_shift = alignment_of(current_block_end); + +	return block_shift; +} +  int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,  			   struct ib_umem *umem)  {  	u64 *pages; -	int i, k, entry; -	int n; -	int len; +	u64 len = 0;  	int err = 0; +	u64 mtt_size; +	u64 cur_start_addr = 0; +	u64 mtt_shift; +	int start_index = 0; +	int npages = 0;  	struct scatterlist *sg; +	int i;  	pages = (u64 *) __get_free_page(GFP_KERNEL);  	if (!pages)  		return -ENOMEM; -	i = n = 0; +	mtt_shift = mtt->page_shift; +	mtt_size = 1ULL << mtt_shift; -	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { -		len = sg_dma_len(sg) >> mtt->page_shift; -		for (k = 0; k < len; ++k) { -			pages[i++] = sg_dma_address(sg) + -				(k << umem->page_shift); -			/* -			 * Be friendly to mlx4_write_mtt() and -			 * pass it chunks of appropriate size. -			 */ -			if (i == PAGE_SIZE / sizeof (u64)) { -				err = mlx4_write_mtt(dev->dev, mtt, n, -						     i, pages); -				if (err) -					goto out; -				n += i; -				i = 0; -			} +	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { +		if (cur_start_addr + len == sg_dma_address(sg)) { +			/* still the same block */ +			len += sg_dma_len(sg); +			continue;  		} +		/* +		 * A new block is started ... +		 * If len is malaligned, write an extra mtt entry to cover the +		 * misaligned area (round up the division) +		 */ +		err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, +						   mtt_shift, len, +						   cur_start_addr, +						   pages, &start_index, +						   &npages); +		if (err) +			goto out; + +		cur_start_addr = sg_dma_address(sg); +		len = sg_dma_len(sg); +	} + +	/* Handle the last block */ +	if (len > 0) { +		/* +		 * If len is malaligned, write an extra mtt entry to cover +		 * the misaligned area (round up the division) +		 */ +		err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, +						   mtt_shift, len, +						   cur_start_addr, pages, +						   &start_index, &npages); +		if (err) +			goto out;  	} -	if (i) -		err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); +	if (npages) +		err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);  out:  	free_page((unsigned long) pages);  	return err;  } +/* + * Calculate optimal mtt size based on contiguous pages. + * Function will return also the number of pages that are not aligned to the + * calculated mtt_size to be added to total number of pages. For that we should + * check the first chunk length & last chunk length and if not aligned to + * mtt_size we should increment the non_aligned_pages number. All chunks in the + * middle already handled as part of mtt shift calculation for both their start + * & end addresses. + */ +int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, +				       int *num_of_mtts) +{ +	u64 block_shift = MLX4_MAX_MTT_SHIFT; +	u64 min_shift = umem->page_shift; +	u64 last_block_aligned_end = 0; +	u64 current_block_start = 0; +	u64 first_block_start = 0; +	u64 current_block_len = 0; +	u64 last_block_end = 0; +	struct scatterlist *sg; +	u64 current_block_end; +	u64 misalignment_bits; +	u64 next_block_start; +	u64 total_len = 0; +	int i; + +	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { +		/* +		 * Initialization - save the first chunk start as the +		 * current_block_start - block means contiguous pages. +		 */ +		if (current_block_len == 0 && current_block_start == 0) { +			current_block_start = sg_dma_address(sg); +			first_block_start = current_block_start; +			/* +			 * Find the bits that are different between the physical +			 * address and the virtual address for the start of the +			 * MR. +			 * umem_get aligned the start_va to a page boundary. +			 * Therefore, we need to align the start va to the same +			 * boundary. +			 * misalignment_bits is needed to handle the  case of a +			 * single memory region. In this case, the rest of the +			 * logic will not reduce the block size.  If we use a +			 * block size which is bigger than the alignment of the +			 * misalignment bits, we might use the virtual page +			 * number instead of the physical page number, resulting +			 * in access to the wrong data. +			 */ +			misalignment_bits = +			(start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL))) +			^ current_block_start; +			block_shift = min(alignment_of(misalignment_bits), +					  block_shift); +		} + +		/* +		 * Go over the scatter entries and check if they continue the +		 * previous scatter entry. +		 */ +		next_block_start = sg_dma_address(sg); +		current_block_end = current_block_start	+ current_block_len; +		/* If we have a split (non-contig.) between two blocks */ +		if (current_block_end != next_block_start) { +			block_shift = mlx4_ib_umem_calc_block_mtt +					(next_block_start, +					 current_block_end, +					 block_shift); + +			/* +			 * If we reached the minimum shift for 4k page we stop +			 * the loop. +			 */ +			if (block_shift <= min_shift) +				goto end; + +			/* +			 * If not saved yet we are in first block - we save the +			 * length of first block to calculate the +			 * non_aligned_pages number at the end. +			 */ +			total_len += current_block_len; + +			/* Start a new block */ +			current_block_start = next_block_start; +			current_block_len = sg_dma_len(sg); +			continue; +		} +		/* The scatter entry is another part of the current block, +		 * increase the block size. +		 * An entry in the scatter can be larger than 4k (page) as of +		 * dma mapping which merge some blocks together. +		 */ +		current_block_len += sg_dma_len(sg); +	} + +	/* Account for the last block in the total len */ +	total_len += current_block_len; +	/* Add to the first block the misalignment that it suffers from. */ +	total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); +	last_block_end = current_block_start + current_block_len; +	last_block_aligned_end = round_up(last_block_end, 1 << block_shift); +	total_len += (last_block_aligned_end - last_block_end); + +	if (total_len & ((1ULL << block_shift) - 1ULL)) +		pr_warn("misaligned total length detected (%llu, %llu)!", +			total_len, block_shift); + +	*num_of_mtts = total_len >> block_shift; +end: +	if (block_shift < min_shift) { +		/* +		 * If shift is less than the min we set a warning and return the +		 * min shift. +		 */ +		pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift); + +		block_shift = min_shift; +	} +	return block_shift; +} +  struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  				  u64 virt_addr, int access_flags,  				  struct ib_udata *udata) @@ -155,7 +391,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  	}  	n = ib_umem_page_count(mr->umem); -	shift = mr->umem->page_shift; +	shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);  	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,  			    convert_access(access_flags), n, shift, &mr->mmr); @@ -406,7 +642,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,  		goto err_free_mr;  	mr->max_pages = max_num_sg; -  	err = mlx4_mr_enable(dev->dev, &mr->mmr);  	if (err)  		goto err_free_pl; @@ -417,6 +652,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,  	return &mr->ibmr;  err_free_pl: +	mr->ibmr.device = pd->device;  	mlx4_free_priv_pages(mr);  err_free_mr:  	(void) mlx4_mr_free(dev->dev, &mr->mmr);  | 

