diff options
Diffstat (limited to 'drivers/infiniband/ulp/srp')
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 179 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 1 |
2 files changed, 146 insertions, 34 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 427dee3a3ab0..7ecb1970cb87 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -70,6 +70,7 @@ static unsigned int indirect_sg_entries; static bool allow_ext_sg; static bool prefer_fr = true; static bool register_always = true; +static bool never_register; static int topspin_workarounds = 1; module_param(srp_sg_tablesize, uint, 0444); @@ -99,6 +100,9 @@ module_param(register_always, bool, 0444); MODULE_PARM_DESC(register_always, "Use memory registration even for contiguous memory regions"); +module_param(never_register, bool, 0444); +MODULE_PARM_DESC(never_register, "Never register memory"); + static const struct kernel_param_ops srp_tmo_ops; static int srp_reconnect_delay = 10; @@ -447,16 +451,16 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) /** * srp_destroy_qp() - destroy an RDMA queue pair - * @ch: SRP RDMA channel. + * @qp: RDMA queue pair. * * Drain the qp before destroying it. This avoids that the receive * completion handler can access the queue pair while it is * being destroyed. */ -static void srp_destroy_qp(struct srp_rdma_ch *ch) +static void srp_destroy_qp(struct ib_qp *qp) { - ib_drain_rq(ch->qp); - ib_destroy_qp(ch->qp); + ib_drain_rq(qp); + ib_destroy_qp(qp); } static int srp_create_ch_ib(struct srp_rdma_ch *ch) @@ -468,7 +472,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) struct ib_qp *qp; struct ib_fmr_pool *fmr_pool = NULL; struct srp_fr_pool *fr_pool = NULL; - const int m = dev->use_fast_reg ? 3 : 1; + const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); @@ -529,7 +533,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) } if (ch->qp) - srp_destroy_qp(ch); + srp_destroy_qp(ch->qp); if (ch->recv_cq) ib_free_cq(ch->recv_cq); if (ch->send_cq) @@ -553,7 +557,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return 0; err_qp: - srp_destroy_qp(ch); + srp_destroy_qp(qp); err_send_cq: ib_free_cq(send_cq); @@ -596,7 +600,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, ib_destroy_fmr_pool(ch->fmr_pool); } - srp_destroy_qp(ch); + srp_destroy_qp(ch->qp); ib_free_cq(ch->send_cq); ib_free_cq(ch->recv_cq); @@ -849,7 +853,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch) for (i = 0; i < target->req_ring_size; ++i) { req = &ch->req_ring[i]; - mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), + mr_list = kmalloc(target->mr_per_cmd * sizeof(void *), GFP_KERNEL); if (!mr_list) goto out; @@ -1298,9 +1302,16 @@ static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) srp_handle_qp_err(cq, wc, "FAST REG"); } +/* + * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset + * where to start in the first element. If sg_offset_p != NULL then + * *sg_offset_p is updated to the offset in state->sg[retval] of the first + * byte that has not yet been mapped. + */ static int srp_map_finish_fr(struct srp_map_state *state, struct srp_request *req, - struct srp_rdma_ch *ch, int sg_nents) + struct srp_rdma_ch *ch, int sg_nents, + unsigned int *sg_offset_p) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; @@ -1316,9 +1327,13 @@ static int srp_map_finish_fr(struct srp_map_state *state, WARN_ON_ONCE(!dev->use_fast_reg); if (sg_nents == 1 && target->global_mr) { - srp_map_desc(state, sg_dma_address(state->sg), - sg_dma_len(state->sg), + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; + + srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, + sg_dma_len(state->sg) - sg_offset, target->global_mr->rkey); + if (sg_offset_p) + *sg_offset_p = 0; return 1; } @@ -1329,9 +1344,17 @@ static int srp_map_finish_fr(struct srp_map_state *state, rkey = ib_inc_rkey(desc->mr->rkey); ib_update_fast_reg_key(desc->mr, rkey); - n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size); - if (unlikely(n < 0)) + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, + dev->mr_page_size); + if (unlikely(n < 0)) { + srp_fr_pool_put(ch->fr_pool, &desc, 1); + pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", + dev_name(&req->scmnd->device->sdev_gendev), sg_nents, + sg_offset_p ? *sg_offset_p : -1, n); return n; + } + + WARN_ON_ONCE(desc->mr->length == 0); req->reg_cqe.done = srp_reg_mr_err_done; @@ -1353,8 +1376,10 @@ static int srp_map_finish_fr(struct srp_map_state *state, desc->mr->length, desc->mr->rkey); err = ib_post_send(ch->qp, &wr.wr, &bad_wr); - if (unlikely(err)) + if (unlikely(err)) { + WARN_ON_ONCE(err == -ENOMEM); return err; + } return n; } @@ -1411,7 +1436,7 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, state->pages = req->map_page; state->fmr.next = req->fmr_list; - state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; + state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; for_each_sg(scat, sg, count, i) { ret = srp_map_sg_entry(state, ch, sg, i); @@ -1430,9 +1455,11 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, struct srp_request *req, struct scatterlist *scat, int count) { + unsigned int sg_offset = 0; + state->desc = req->indirect_desc; state->fr.next = req->fr_list; - state->fr.end = req->fr_list + ch->target->cmd_sg_cnt; + state->fr.end = req->fr_list + ch->target->mr_per_cmd; state->sg = scat; if (count == 0) @@ -1441,7 +1468,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, while (count) { int i, n; - n = srp_map_finish_fr(state, req, ch, count); + n = srp_map_finish_fr(state, req, ch, count, &sg_offset); if (unlikely(n < 0)) return n; @@ -1501,14 +1528,15 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, if (dev->use_fast_reg) { state.sg = idb_sg; - sg_set_buf(idb_sg, req->indirect_desc, idb_len); + sg_init_one(idb_sg, req->indirect_desc, idb_len); idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ #ifdef CONFIG_NEED_SG_DMA_LENGTH idb_sg->dma_length = idb_sg->length; /* hack^2 */ #endif - ret = srp_map_finish_fr(&state, req, ch, 1); + ret = srp_map_finish_fr(&state, req, ch, 1, NULL); if (ret < 0) return ret; + WARN_ON_ONCE(ret < 1); } else if (dev->use_fmr) { state.pages = idb_pages; state.pages[0] = (req->indirect_dma_addr & @@ -1526,6 +1554,32 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, return 0; } +#if defined(DYNAMIC_DATA_DEBUG) +static void srp_check_mapping(struct srp_map_state *state, + struct srp_rdma_ch *ch, struct srp_request *req, + struct scatterlist *scat, int count) +{ + struct srp_device *dev = ch->target->srp_host->srp_dev; + struct srp_fr_desc **pfr; + u64 desc_len = 0, mr_len = 0; + int i; + + for (i = 0; i < state->ndesc; i++) + desc_len += be32_to_cpu(req->indirect_desc[i].len); + if (dev->use_fast_reg) + for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) + mr_len += (*pfr)->mr->length; + else if (dev->use_fmr) + for (i = 0; i < state->nmdesc; i++) + mr_len += be32_to_cpu(req->indirect_desc[i].len); + if (desc_len != scsi_bufflen(req->scmnd) || + mr_len > scsi_bufflen(req->scmnd)) + pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", + scsi_bufflen(req->scmnd), desc_len, mr_len, + state->ndesc, state->nmdesc); +} +#endif + /** * srp_map_data() - map SCSI data buffer onto an SRP request * @scmnd: SCSI command to map @@ -1611,6 +1665,15 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, if (ret < 0) goto unmap; +#if defined(DYNAMIC_DEBUG) + { + DEFINE_DYNAMIC_DEBUG_METADATA(ddm, + "Memory mapping consistency check"); + if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT)) + srp_check_mapping(&state, ch, req, scat, count); + } +#endif + /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this * target is not using an external indirect table, we are @@ -2575,6 +2638,20 @@ static int srp_reset_host(struct scsi_cmnd *scmnd) return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; } +static int srp_slave_alloc(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + struct srp_target_port *target = host_to_target(shost); + struct srp_device *srp_dev = target->srp_host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; + + if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + blk_queue_virt_boundary(sdev->request_queue, + ~srp_dev->mr_page_mask); + + return 0; +} + static int srp_slave_configure(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; @@ -2766,6 +2843,7 @@ static struct scsi_host_template srp_template = { .module = THIS_MODULE, .name = "InfiniBand SRP initiator", .proc_name = DRV_NAME, + .slave_alloc = srp_slave_alloc, .slave_configure = srp_slave_configure, .info = srp_target_info, .queuecommand = srp_queuecommand, @@ -2840,7 +2918,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) goto out; } - pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n", + pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", dev_name(&target->scsi_host->shost_gendev), srp_sdev_count(target->scsi_host)); @@ -3172,6 +3250,7 @@ static ssize_t srp_create_target(struct device *dev, struct srp_device *srp_dev = host->srp_dev; struct ib_device *ibdev = srp_dev->dev; int ret, node_idx, node, cpu, i; + unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; target_host = scsi_host_alloc(&srp_template, @@ -3228,8 +3307,33 @@ static ssize_t srp_create_target(struct device *dev, target->sg_tablesize = target->cmd_sg_cnt; } + if (srp_dev->use_fast_reg || srp_dev->use_fmr) { + /* + * FR and FMR can only map one HCA page per entry. If the + * start address is not aligned on a HCA page boundary two + * entries will be used for the head and the tail although + * these two entries combined contain at most one HCA page of + * data. Hence the "+ 1" in the calculation below. + * + * The indirect data buffer descriptor is contiguous so the + * memory for that buffer will only be registered if + * register_always is true. Hence add one to mr_per_cmd if + * register_always has been set. + */ + max_sectors_per_mr = srp_dev->max_pages_per_mr << + (ilog2(srp_dev->mr_page_size) - 9); + mr_per_cmd = register_always + + (target->scsi_host->max_sectors + 1 + + max_sectors_per_mr - 1) / max_sectors_per_mr; + pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", + target->scsi_host->max_sectors, + srp_dev->max_pages_per_mr, srp_dev->mr_page_size, + max_sectors_per_mr, mr_per_cmd); + } + target_host->sg_tablesize = target->sg_tablesize; - target->mr_pool_size = target->scsi_host->can_queue; + target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; + target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); target->max_iu_len = sizeof (struct srp_cmd) + @@ -3426,17 +3530,6 @@ static void srp_add_one(struct ib_device *device) if (!srp_dev) return; - srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && - device->map_phys_fmr && device->unmap_fmr); - srp_dev->has_fr = (device->attrs.device_cap_flags & - IB_DEVICE_MEM_MGT_EXTENSIONS); - if (!srp_dev->has_fmr && !srp_dev->has_fr) - dev_warn(&device->dev, "neither FMR nor FR is supported\n"); - - srp_dev->use_fast_reg = (srp_dev->has_fr && - (!srp_dev->has_fmr || prefer_fr)); - srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; - /* * Use the smallest page size supported by the HCA, down to a * minimum of 4096 bytes. We're unlikely to build large sglists @@ -3447,8 +3540,25 @@ static void srp_add_one(struct ib_device *device) srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); max_pages_per_mr = device->attrs.max_mr_size; do_div(max_pages_per_mr, srp_dev->mr_page_size); + pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, + device->attrs.max_mr_size, srp_dev->mr_page_size, + max_pages_per_mr, SRP_MAX_PAGES_PER_MR); srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); + + srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && + device->map_phys_fmr && device->unmap_fmr); + srp_dev->has_fr = (device->attrs.device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS); + if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { + dev_warn(&device->dev, "neither FMR nor FR is supported\n"); + } else if (!never_register && + device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) { + srp_dev->use_fast_reg = (srp_dev->has_fr && + (!srp_dev->has_fmr || prefer_fr)); + srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; + } + if (srp_dev->use_fast_reg) { srp_dev->max_pages_per_mr = min_t(u32, srp_dev->max_pages_per_mr, @@ -3468,7 +3578,8 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->pd)) goto free_dev; - if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) { + if (never_register || !register_always || + (!srp_dev->has_fmr && !srp_dev->has_fr)) { srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd, IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a00914cdd44e..26bb9b0a7a63 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -203,6 +203,7 @@ struct srp_target_port { unsigned int scsi_id; unsigned int sg_tablesize; int mr_pool_size; + int mr_per_cmd; int queue_size; int req_ring_size; int comp_vector; |