diff options
Diffstat (limited to 'drivers/nvme/host')
| -rw-r--r-- | drivers/nvme/host/core.c | 59 | ||||
| -rw-r--r-- | drivers/nvme/host/fabrics.c | 7 | ||||
| -rw-r--r-- | drivers/nvme/host/fabrics.h | 9 | ||||
| -rw-r--r-- | drivers/nvme/host/fc.c | 184 | ||||
| -rw-r--r-- | drivers/nvme/host/multipath.c | 43 | ||||
| -rw-r--r-- | drivers/nvme/host/nvme.h | 11 | ||||
| -rw-r--r-- | drivers/nvme/host/pci.c | 59 | ||||
| -rw-r--r-- | drivers/nvme/host/rdma.c | 20 | 
8 files changed, 150 insertions, 242 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f431c32774f3..7aeca5db7916 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -120,8 +120,12 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)  	int ret;  	ret = nvme_reset_ctrl(ctrl); -	if (!ret) +	if (!ret) {  		flush_work(&ctrl->reset_work); +		if (ctrl->state != NVME_CTRL_LIVE) +			ret = -ENETRESET; +	} +  	return ret;  }  EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); @@ -265,7 +269,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,  	switch (new_state) {  	case NVME_CTRL_ADMIN_ONLY:  		switch (old_state) { -		case NVME_CTRL_RECONNECTING: +		case NVME_CTRL_CONNECTING:  			changed = true;  			/* FALLTHRU */  		default: @@ -276,7 +280,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,  		switch (old_state) {  		case NVME_CTRL_NEW:  		case NVME_CTRL_RESETTING: -		case NVME_CTRL_RECONNECTING: +		case NVME_CTRL_CONNECTING:  			changed = true;  			/* FALLTHRU */  		default: @@ -294,9 +298,9 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,  			break;  		}  		break; -	case NVME_CTRL_RECONNECTING: +	case NVME_CTRL_CONNECTING:  		switch (old_state) { -		case NVME_CTRL_LIVE: +		case NVME_CTRL_NEW:  		case NVME_CTRL_RESETTING:  			changed = true;  			/* FALLTHRU */ @@ -309,7 +313,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,  		case NVME_CTRL_LIVE:  		case NVME_CTRL_ADMIN_ONLY:  		case NVME_CTRL_RESETTING: -		case NVME_CTRL_RECONNECTING: +		case NVME_CTRL_CONNECTING:  			changed = true;  			/* FALLTHRU */  		default: @@ -518,9 +522,11 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,  		u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);  		u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; -		range[n].cattr = cpu_to_le32(0); -		range[n].nlb = cpu_to_le32(nlb); -		range[n].slba = cpu_to_le64(slba); +		if (n < segments) { +			range[n].cattr = cpu_to_le32(0); +			range[n].nlb = cpu_to_le32(nlb); +			range[n].slba = cpu_to_le64(slba); +		}  		n++;  	} @@ -794,13 +800,9 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)  static int nvme_keep_alive(struct nvme_ctrl *ctrl)  { -	struct nvme_command c;  	struct request *rq; -	memset(&c, 0, sizeof(c)); -	c.common.opcode = nvme_admin_keep_alive; - -	rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED, +	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,  			NVME_QID_ANY);  	if (IS_ERR(rq))  		return PTR_ERR(rq); @@ -832,6 +834,8 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl)  		return;  	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work); +	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); +	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;  	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);  }  EXPORT_SYMBOL_GPL(nvme_start_keep_alive); @@ -1117,14 +1121,19 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  static void nvme_update_formats(struct nvme_ctrl *ctrl)  { -	struct nvme_ns *ns; +	struct nvme_ns *ns, *next; +	LIST_HEAD(rm_list);  	mutex_lock(&ctrl->namespaces_mutex);  	list_for_each_entry(ns, &ctrl->namespaces, list) { -		if (ns->disk && nvme_revalidate_disk(ns->disk)) -			nvme_ns_remove(ns); +		if (ns->disk && nvme_revalidate_disk(ns->disk)) { +			list_move_tail(&ns->list, &rm_list); +		}  	}  	mutex_unlock(&ctrl->namespaces_mutex); + +	list_for_each_entry_safe(ns, next, &rm_list, list) +		nvme_ns_remove(ns);  }  static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) @@ -2687,7 +2696,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,  		[NVME_CTRL_LIVE]	= "live",  		[NVME_CTRL_ADMIN_ONLY]	= "only-admin",  		[NVME_CTRL_RESETTING]	= "resetting", -		[NVME_CTRL_RECONNECTING]= "reconnecting", +		[NVME_CTRL_CONNECTING]	= "connecting",  		[NVME_CTRL_DELETING]	= "deleting",  		[NVME_CTRL_DEAD]	= "dead",  	}; @@ -2835,7 +2844,7 @@ out:  }  static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, -		struct nvme_id_ns *id, bool *new) +		struct nvme_id_ns *id)  {  	struct nvme_ctrl *ctrl = ns->ctrl;  	bool is_shared = id->nmic & (1 << 0); @@ -2851,8 +2860,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,  			ret = PTR_ERR(head);  			goto out_unlock;  		} - -		*new = true;  	} else {  		struct nvme_ns_ids ids; @@ -2864,8 +2871,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,  			ret = -EINVAL;  			goto out_unlock;  		} - -		*new = false;  	}  	list_add_tail(&ns->siblings, &head->list); @@ -2936,7 +2941,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)  	struct nvme_id_ns *id;  	char disk_name[DISK_NAME_LEN];  	int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT; -	bool new = true;  	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);  	if (!ns) @@ -2962,7 +2966,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)  	if (id->ncap == 0)  		goto out_free_id; -	if (nvme_init_ns_head(ns, nsid, id, &new)) +	if (nvme_init_ns_head(ns, nsid, id))  		goto out_free_id;  	nvme_setup_streams_ns(ctrl, ns); @@ -3028,9 +3032,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)  		pr_warn("%s: failed to register lightnvm sysfs group for identification\n",  			ns->disk->disk_name); -	if (new) -		nvme_mpath_add_disk(ns->head); -	nvme_mpath_add_disk_links(ns); +	nvme_mpath_add_disk(ns->head);  	return;   out_unlink_ns:  	mutex_lock(&ctrl->subsys->lock); @@ -3050,7 +3052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)  		return;  	if (ns->disk && ns->disk->flags & GENHD_FL_UP) { -		nvme_mpath_remove_disk_links(ns);  		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,  					&nvme_ns_id_attr_group);  		if (ns->ndev) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5dd4ceefed8f..8f0f34d06d46 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect);   */  int nvmf_register_transport(struct nvmf_transport_ops *ops)  { -	if (!ops->create_ctrl || !ops->module) +	if (!ops->create_ctrl)  		return -EINVAL;  	down_write(&nvmf_transports_rwsem); @@ -650,6 +650,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,  				ret = -EINVAL;  				goto out;  			} +			if (opts->discovery_nqn) { +				pr_debug("Ignoring nr_io_queues value for discovery controller\n"); +				break; +			} +  			opts->nr_io_queues = min_t(unsigned int,  					num_online_cpus(), token);  			break; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 25b19f722f5b..a3145d90c1d2 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -171,13 +171,14 @@ static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,  	    cmd->common.opcode != nvme_fabrics_command ||  	    cmd->fabrics.fctype != nvme_fabrics_type_connect) {  		/* -		 * Reconnecting state means transport disruption, which can take -		 * a long time and even might fail permanently, fail fast to -		 * give upper layers a chance to failover. +		 * Connecting state means transport disruption or initial +		 * establishment, which can take a long time and even might +		 * fail permanently, fail fast to give upper layers a chance +		 * to failover.  		 * Deleting state means that the ctrl will never accept commands  		 * again, fail it permanently.  		 */ -		if (ctrl->state == NVME_CTRL_RECONNECTING || +		if (ctrl->state == NVME_CTRL_CONNECTING ||  		    ctrl->state == NVME_CTRL_DELETING) {  			nvme_req(rq)->status = NVME_SC_ABORT_REQ;  			return BLK_STS_IOERR; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b856d7c919d2..1dc1387b7134 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -55,9 +55,7 @@ struct nvme_fc_queue {  enum nvme_fcop_flags {  	FCOP_FLAGS_TERMIO	= (1 << 0), -	FCOP_FLAGS_RELEASED	= (1 << 1), -	FCOP_FLAGS_COMPLETE	= (1 << 2), -	FCOP_FLAGS_AEN		= (1 << 3), +	FCOP_FLAGS_AEN		= (1 << 1),  };  struct nvmefc_ls_req_op { @@ -532,7 +530,7 @@ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)  {  	switch (ctrl->ctrl.state) {  	case NVME_CTRL_NEW: -	case NVME_CTRL_RECONNECTING: +	case NVME_CTRL_CONNECTING:  		/*  		 * As all reconnects were suppressed, schedule a  		 * connect. @@ -777,7 +775,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)  		}  		break; -	case NVME_CTRL_RECONNECTING: +	case NVME_CTRL_CONNECTING:  		/*  		 * The association has already been terminated and the  		 * controller is attempting reconnects.  No need to do anything @@ -1208,7 +1206,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,  				sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));  	assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); -	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); +	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1);  	/* Linux supports only Dynamic controllers */  	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);  	uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); @@ -1323,7 +1321,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,  				sizeof(struct fcnvme_lsdesc_cr_conn_cmd));  	conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);  	conn_rqst->connect_cmd.qid  = cpu_to_be16(queue->qnum); -	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); +	conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1);  	lsop->queue = queue;  	lsreq->rqstaddr = conn_rqst; @@ -1470,7 +1468,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)  /* *********************** NVME Ctrl Routines **************************** */ -static void __nvme_fc_final_op_cleanup(struct request *rq);  static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);  static int @@ -1512,13 +1509,19 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,  static int  __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)  { -	int state; +	unsigned long flags; +	int opstate; + +	spin_lock_irqsave(&ctrl->lock, flags); +	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); +	if (opstate != FCPOP_STATE_ACTIVE) +		atomic_set(&op->state, opstate); +	else if (ctrl->flags & FCCTRL_TERMIO) +		ctrl->iocnt++; +	spin_unlock_irqrestore(&ctrl->lock, flags); -	state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); -	if (state != FCPOP_STATE_ACTIVE) { -		atomic_set(&op->state, state); +	if (opstate != FCPOP_STATE_ACTIVE)  		return -ECANCELED; -	}  	ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,  					&ctrl->rport->remoteport, @@ -1532,60 +1535,26 @@ static void  nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)  {  	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; -	unsigned long flags; -	int i, ret; - -	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { -		if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) -			continue; - -		spin_lock_irqsave(&ctrl->lock, flags); -		if (ctrl->flags & FCCTRL_TERMIO) { -			ctrl->iocnt++; -			aen_op->flags |= FCOP_FLAGS_TERMIO; -		} -		spin_unlock_irqrestore(&ctrl->lock, flags); - -		ret = __nvme_fc_abort_op(ctrl, aen_op); -		if (ret) { -			/* -			 * if __nvme_fc_abort_op failed the io wasn't -			 * active. Thus this call path is running in -			 * parallel to the io complete. Treat as non-error. -			 */ +	int i; -			/* back out the flags/counters */ -			spin_lock_irqsave(&ctrl->lock, flags); -			if (ctrl->flags & FCCTRL_TERMIO) -				ctrl->iocnt--; -			aen_op->flags &= ~FCOP_FLAGS_TERMIO; -			spin_unlock_irqrestore(&ctrl->lock, flags); -			return; -		} -	} +	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) +		__nvme_fc_abort_op(ctrl, aen_op);  } -static inline int +static inline void  __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, -		struct nvme_fc_fcp_op *op) +		struct nvme_fc_fcp_op *op, int opstate)  {  	unsigned long flags; -	bool complete_rq = false; -	spin_lock_irqsave(&ctrl->lock, flags); -	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { +	if (opstate == FCPOP_STATE_ABORTED) { +		spin_lock_irqsave(&ctrl->lock, flags);  		if (ctrl->flags & FCCTRL_TERMIO) {  			if (!--ctrl->iocnt)  				wake_up(&ctrl->ioabort_wait);  		} +		spin_unlock_irqrestore(&ctrl->lock, flags);  	} -	if (op->flags & FCOP_FLAGS_RELEASED) -		complete_rq = true; -	else -		op->flags |= FCOP_FLAGS_COMPLETE; -	spin_unlock_irqrestore(&ctrl->lock, flags); - -	return complete_rq;  }  static void @@ -1601,6 +1570,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)  	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);  	union nvme_result result;  	bool terminate_assoc = true; +	int opstate;  	/*  	 * WARNING: @@ -1639,11 +1609,12 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)  	 * association to be terminated.  	 */ +	opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); +  	fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,  				sizeof(op->rsp_iu), DMA_FROM_DEVICE); -	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED || -			op->flags & FCOP_FLAGS_TERMIO) +	if (opstate == FCPOP_STATE_ABORTED)  		status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);  	else if (freq->status)  		status = cpu_to_le16(NVME_SC_INTERNAL << 1); @@ -1708,7 +1679,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)  done:  	if (op->flags & FCOP_FLAGS_AEN) {  		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); -		__nvme_fc_fcpop_chk_teardowns(ctrl, op); +		__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);  		atomic_set(&op->state, FCPOP_STATE_IDLE);  		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */  		nvme_fc_ctrl_put(ctrl); @@ -1722,13 +1693,11 @@ done:  	if (status &&  	    (blk_queue_dying(rq->q) ||  	     ctrl->ctrl.state == NVME_CTRL_NEW || -	     ctrl->ctrl.state == NVME_CTRL_RECONNECTING)) +	     ctrl->ctrl.state == NVME_CTRL_CONNECTING))  		status |= cpu_to_le16(NVME_SC_DNR << 1); -	if (__nvme_fc_fcpop_chk_teardowns(ctrl, op)) -		__nvme_fc_final_op_cleanup(rq); -	else -		nvme_end_request(rq, status, result); +	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); +	nvme_end_request(rq, status, result);  check_error:  	if (terminate_assoc) @@ -2415,46 +2384,16 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)  }  static void -__nvme_fc_final_op_cleanup(struct request *rq) +nvme_fc_complete_rq(struct request *rq)  {  	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);  	struct nvme_fc_ctrl *ctrl = op->ctrl;  	atomic_set(&op->state, FCPOP_STATE_IDLE); -	op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | -			FCOP_FLAGS_COMPLETE);  	nvme_fc_unmap_data(ctrl, rq, op);  	nvme_complete_rq(rq);  	nvme_fc_ctrl_put(ctrl); - -} - -static void -nvme_fc_complete_rq(struct request *rq) -{ -	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); -	struct nvme_fc_ctrl *ctrl = op->ctrl; -	unsigned long flags; -	bool completed = false; - -	/* -	 * the core layer, on controller resets after calling -	 * nvme_shutdown_ctrl(), calls complete_rq without our -	 * calling blk_mq_complete_request(), thus there may still -	 * be live i/o outstanding with the LLDD. Means transport has -	 * to track complete calls vs fcpio_done calls to know what -	 * path to take on completes and dones. -	 */ -	spin_lock_irqsave(&ctrl->lock, flags); -	if (op->flags & FCOP_FLAGS_COMPLETE) -		completed = true; -	else -		op->flags |= FCOP_FLAGS_RELEASED; -	spin_unlock_irqrestore(&ctrl->lock, flags); - -	if (completed) -		__nvme_fc_final_op_cleanup(rq);  }  /* @@ -2476,35 +2415,11 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)  	struct nvme_ctrl *nctrl = data;  	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);  	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); -	unsigned long flags; -	int status;  	if (!blk_mq_request_started(req))  		return; -	spin_lock_irqsave(&ctrl->lock, flags); -	if (ctrl->flags & FCCTRL_TERMIO) { -		ctrl->iocnt++; -		op->flags |= FCOP_FLAGS_TERMIO; -	} -	spin_unlock_irqrestore(&ctrl->lock, flags); - -	status = __nvme_fc_abort_op(ctrl, op); -	if (status) { -		/* -		 * if __nvme_fc_abort_op failed the io wasn't -		 * active. Thus this call path is running in -		 * parallel to the io complete. Treat as non-error. -		 */ - -		/* back out the flags/counters */ -		spin_lock_irqsave(&ctrl->lock, flags); -		if (ctrl->flags & FCCTRL_TERMIO) -			ctrl->iocnt--; -		op->flags &= ~FCOP_FLAGS_TERMIO; -		spin_unlock_irqrestore(&ctrl->lock, flags); -		return; -	} +	__nvme_fc_abort_op(ctrl, op);  } @@ -2566,11 +2481,11 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)  		goto out_free_tag_set;  	} -	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); +	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);  	if (ret)  		goto out_cleanup_blk_queue; -	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); +	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);  	if (ret)  		goto out_delete_hw_queues; @@ -2617,11 +2532,11 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)  	if (ret)  		goto out_free_io_queues; -	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); +	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);  	if (ret)  		goto out_free_io_queues; -	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); +	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);  	if (ret)  		goto out_delete_hw_queues; @@ -2717,13 +2632,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)  	nvme_fc_init_queue(ctrl, 0);  	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, -				NVME_AQ_BLK_MQ_DEPTH); +				NVME_AQ_DEPTH);  	if (ret)  		goto out_free_queue;  	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], -				NVME_AQ_BLK_MQ_DEPTH, -				(NVME_AQ_BLK_MQ_DEPTH / 4)); +				NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));  	if (ret)  		goto out_delete_hw_queue; @@ -2751,7 +2665,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)  	}  	ctrl->ctrl.sqsize = -		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize); +		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);  	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);  	if (ret) @@ -2784,6 +2698,14 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)  		opts->queue_size = ctrl->ctrl.maxcmd;  	} +	if (opts->queue_size > ctrl->ctrl.sqsize + 1) { +		/* warn if sqsize is lower than queue_size */ +		dev_warn(ctrl->ctrl.device, +			"queue_size %zu > ctrl sqsize %u, clamping down\n", +			opts->queue_size, ctrl->ctrl.sqsize + 1); +		opts->queue_size = ctrl->ctrl.sqsize + 1; +	} +  	ret = nvme_fc_init_aen_ops(ctrl);  	if (ret)  		goto out_term_aen_ops; @@ -2943,7 +2865,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)  	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;  	bool recon = true; -	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) +	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)  		return;  	if (portptr->port_state == FC_OBJSTATE_ONLINE) @@ -2991,10 +2913,10 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)  	/* will block will waiting for io to terminate */  	nvme_fc_delete_association(ctrl); -	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { +	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {  		dev_err(ctrl->ctrl.device,  			"NVME-FC{%d}: error_recovery: Couldn't change state " -			"to RECONNECTING\n", ctrl->cnum); +			"to CONNECTING\n", ctrl->cnum);  		return;  	} @@ -3195,7 +3117,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,  	 * transport errors (frame drop, LS failure) inherently must kill  	 * the association. The transport is coded so that any command used  	 * to create the association (prior to a LIVE state transition -	 * while NEW or RECONNECTING) will fail if it completes in error or +	 * while NEW or CONNECTING) will fail if it completes in error or  	 * times out.  	 *  	 * As such: as the connect request was mostly likely due to a diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3b211d9e58b8..060f69e03427 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -198,30 +198,16 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head)  {  	if (!head->disk)  		return; -	device_add_disk(&head->subsys->dev, head->disk); -	if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, -			&nvme_ns_id_attr_group)) -		pr_warn("%s: failed to create sysfs group for identification\n", -			head->disk->disk_name); -} - -void nvme_mpath_add_disk_links(struct nvme_ns *ns) -{ -	struct kobject *slave_disk_kobj, *holder_disk_kobj; - -	if (!ns->head->disk) -		return; - -	slave_disk_kobj = &disk_to_dev(ns->disk)->kobj; -	if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj, -			kobject_name(slave_disk_kobj))) -		return; -	holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj; -	if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj, -			kobject_name(holder_disk_kobj))) -		sysfs_remove_link(ns->head->disk->slave_dir, -			kobject_name(slave_disk_kobj)); +	mutex_lock(&head->subsys->lock); +	if (!(head->disk->flags & GENHD_FL_UP)) { +		device_add_disk(&head->subsys->dev, head->disk); +		if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, +				&nvme_ns_id_attr_group)) +			pr_warn("%s: failed to create sysfs group for identification\n", +				head->disk->disk_name); +	} +	mutex_unlock(&head->subsys->lock);  }  void nvme_mpath_remove_disk(struct nvme_ns_head *head) @@ -238,14 +224,3 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)  	blk_cleanup_queue(head->disk->queue);  	put_disk(head->disk);  } - -void nvme_mpath_remove_disk_links(struct nvme_ns *ns) -{ -	if (!ns->head->disk) -		return; - -	sysfs_remove_link(ns->disk->part0.holder_dir, -			kobject_name(&disk_to_dev(ns->head->disk)->kobj)); -	sysfs_remove_link(ns->head->disk->slave_dir, -			kobject_name(&disk_to_dev(ns->disk)->kobj)); -} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8e4550fa08f8..d733b14ede9d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -123,7 +123,7 @@ enum nvme_ctrl_state {  	NVME_CTRL_LIVE,  	NVME_CTRL_ADMIN_ONLY,    /* Only admin queue live */  	NVME_CTRL_RESETTING, -	NVME_CTRL_RECONNECTING, +	NVME_CTRL_CONNECTING,  	NVME_CTRL_DELETING,  	NVME_CTRL_DEAD,  }; @@ -183,6 +183,7 @@ struct nvme_ctrl {  	struct work_struct scan_work;  	struct work_struct async_event_work;  	struct delayed_work ka_work; +	struct nvme_command ka_cmd;  	struct work_struct fw_act_work;  	/* Power saving configuration */ @@ -409,9 +410,7 @@ bool nvme_req_needs_failover(struct request *req, blk_status_t error);  void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);  int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);  void nvme_mpath_add_disk(struct nvme_ns_head *head); -void nvme_mpath_add_disk_links(struct nvme_ns *ns);  void nvme_mpath_remove_disk(struct nvme_ns_head *head); -void nvme_mpath_remove_disk_links(struct nvme_ns *ns);  static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)  { @@ -453,12 +452,6 @@ static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)  static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)  {  } -static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns) -{ -} -static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns) -{ -}  static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)  {  } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6fe7af00a1f4..b6f43b738f03 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1141,7 +1141,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)  	/* If there is a reset/reinit ongoing, we shouldn't reset again. */  	switch (dev->ctrl.state) {  	case NVME_CTRL_RESETTING: -	case NVME_CTRL_RECONNECTING: +	case NVME_CTRL_CONNECTING:  		return false;  	default:  		break; @@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)  	if (!(csts & NVME_CSTS_CFS) && !nssro)  		return false; -	/* If PCI error recovery process is happening, we cannot reset or -	 * the recovery mechanism will surely fail. -	 */ -	if (pci_channel_offline(to_pci_dev(dev->dev))) -		return false; -  	return true;  } @@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)  	struct nvme_command cmd;  	u32 csts = readl(dev->bar + NVME_REG_CSTS); +	/* If PCI error recovery process is happening, we cannot reset or +	 * the recovery mechanism will surely fail. +	 */ +	mb(); +	if (pci_channel_offline(to_pci_dev(dev->dev))) +		return BLK_EH_RESET_TIMER; +  	/*  	 * Reset immediately if the controller is failed  	 */ @@ -1215,13 +1216,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)  	 * cancellation error. All outstanding requests are completed on  	 * shutdown, so we return BLK_EH_HANDLED.  	 */ -	if (dev->ctrl.state == NVME_CTRL_RESETTING) { +	switch (dev->ctrl.state) { +	case NVME_CTRL_CONNECTING: +	case NVME_CTRL_RESETTING:  		dev_warn(dev->ctrl.device,  			 "I/O %d QID %d timeout, disable controller\n",  			 req->tag, nvmeq->qid);  		nvme_dev_disable(dev, false);  		nvme_req(req)->flags |= NVME_REQ_CANCELLED;  		return BLK_EH_HANDLED; +	default: +		break;  	}  	/* @@ -1364,18 +1369,14 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,  static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,  				int qid, int depth)  { -	if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { -		unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), -						      dev->ctrl.page_size); -		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; -		nvmeq->sq_cmds_io = dev->cmb + offset; -	} else { -		nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), -					&nvmeq->sq_dma_addr, GFP_KERNEL); -		if (!nvmeq->sq_cmds) -			return -ENOMEM; -	} +	/* CMB SQEs will be mapped before creation */ +	if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) +		return 0; +	nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), +					    &nvmeq->sq_dma_addr, GFP_KERNEL); +	if (!nvmeq->sq_cmds) +		return -ENOMEM;  	return 0;  } @@ -1449,10 +1450,17 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)  	struct nvme_dev *dev = nvmeq->dev;  	int result; +	if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { +		unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth), +						      dev->ctrl.page_size); +		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; +		nvmeq->sq_cmds_io = dev->cmb + offset; +	} +  	nvmeq->cq_vector = qid - 1;  	result = adapter_alloc_cq(dev, qid, nvmeq);  	if (result < 0) -		return result; +		goto release_vector;  	result = adapter_alloc_sq(dev, qid, nvmeq);  	if (result < 0) @@ -1466,9 +1474,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)  	return result;   release_sq: +	dev->online_queues--;  	adapter_delete_sq(dev, qid);   release_cq:  	adapter_delete_cq(dev, qid); + release_vector: +	nvmeq->cq_vector = -1;  	return result;  } @@ -1903,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)  	int result, nr_io_queues;  	unsigned long size; -	nr_io_queues = num_present_cpus(); +	nr_io_queues = num_possible_cpus();  	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);  	if (result < 0)  		return result; @@ -2288,12 +2299,12 @@ static void nvme_reset_work(struct work_struct *work)  		nvme_dev_disable(dev, false);  	/* -	 * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the +	 * Introduce CONNECTING state from nvme-fc/rdma transports to mark the  	 * initializing procedure here.  	 */ -	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) { +	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {  		dev_warn(dev->ctrl.device, -			"failed to mark controller RECONNECTING\n"); +			"failed to mark controller CONNECTING\n");  		goto out;  	} diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2bc059f7d73c..4d84a73ee12d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -887,7 +887,7 @@ free_ctrl:  static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)  {  	/* If we are resetting/deleting then do nothing */ -	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { +	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {  		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||  			ctrl->ctrl.state == NVME_CTRL_LIVE);  		return; @@ -973,7 +973,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)  	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);  	nvme_start_queues(&ctrl->ctrl); -	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { +	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {  		/* state change failure should never happen */  		WARN_ON_ONCE(1);  		return; @@ -1051,7 +1051,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,  	struct nvme_rdma_device *dev = queue->device;  	struct ib_device *ibdev = dev->dev; -	if (!blk_rq_bytes(rq)) +	if (!blk_rq_payload_bytes(rq))  		return;  	if (req->mr) { @@ -1166,7 +1166,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,  	c->common.flags |= NVME_CMD_SGL_METABUF; -	if (!blk_rq_bytes(rq)) +	if (!blk_rq_payload_bytes(rq))  		return nvme_rdma_set_sg_null(c);  	req->sg_table.sgl = req->first_sgl; @@ -1756,7 +1756,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)  	nvme_stop_ctrl(&ctrl->ctrl);  	nvme_rdma_shutdown_ctrl(ctrl, false); -	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { +	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {  		/* state change failure should never happen */  		WARN_ON_ONCE(1);  		return; @@ -1784,11 +1784,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)  	return;  out_fail: -	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); -	nvme_remove_namespaces(&ctrl->ctrl); -	nvme_rdma_shutdown_ctrl(ctrl, true); -	nvme_uninit_ctrl(&ctrl->ctrl); -	nvme_put_ctrl(&ctrl->ctrl); +	++ctrl->ctrl.nr_reconnects; +	nvme_rdma_reconnect_or_remove(ctrl);  }  static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { @@ -1942,6 +1939,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,  	if (!ctrl->queues)  		goto out_uninit_ctrl; +	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); +	WARN_ON_ONCE(!changed); +  	ret = nvme_rdma_configure_admin_queue(ctrl, true);  	if (ret)  		goto out_kfree_queues;  | 

