389 files changed, 23916 insertions, 7565 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 842530fcd41b..de3b8fce5164 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -256,7 +256,7 @@ config ACPI_PROCESSOR
 
 config ACPI_IPMI
 	tristate "IPMI"
-	depends on IPMI_SI
+	depends on IPMI_HANDLER
 	default n
 	help
 	  This driver enables the ACPI to access the BMC controller. And it
@@ -469,9 +469,8 @@ config ACPI_WATCHDOG
 
 config ACPI_EXTLOG
 	tristate "Extended Error Log support"
-	depends on X86_MCE && X86_LOCAL_APIC
+	depends on X86_MCE && X86_LOCAL_APIC && EDAC
 	select UEFI_CPER
-	select RAS
 	default n
 	help
 	  Certain usages such as Predictive Failure Analysis (PFA) require
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index d78065cc9324..b1aacfc62b1f 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -50,6 +50,7 @@ acpi-$(CONFIG_ACPI_REDUCED_HARDWARE_ONLY) += evged.o
 acpi-y				+= sysfs.o
 acpi-y				+= property.o
 acpi-$(CONFIG_X86)		+= acpi_cmos_rtc.o
+acpi-$(CONFIG_X86)		+= x86/utils.o
 acpi-$(CONFIG_DEBUG_FS)		+= debugfs.o
 acpi-$(CONFIG_ACPI_NUMA)	+= numa.o
 acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index f71b756b05c4..8f52483219ba 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -57,12 +57,23 @@ static int acpi_ac_add(struct acpi_device *device);
 static int acpi_ac_remove(struct acpi_device *device);
 static void acpi_ac_notify(struct acpi_device *device, u32 event);
 
+struct acpi_ac_bl {
+	const char *hid;
+	int hrv;
+};
+
 static const struct acpi_device_id ac_device_ids[] = {
 	{"ACPI0003", 0},
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, ac_device_ids);
 
+/* Lists of PMIC ACPI HIDs with an (often better) native charger driver */
+static const struct acpi_ac_bl acpi_ac_blacklist[] = {
+	{ "INT33F4", -1 }, /* X-Powers AXP288 PMIC */
+	{ "INT34D3",  3 }, /* Intel Cherrytrail Whiskey Cove PMIC */
+};
+
 #ifdef CONFIG_PM_SLEEP
 static int acpi_ac_resume(struct device *dev);
 #endif
@@ -424,11 +435,20 @@ static int acpi_ac_remove(struct acpi_device *device)
 
 static int __init acpi_ac_init(void)
 {
+	unsigned int i;
 	int result;
 
 	if (acpi_disabled)
 		return -ENODEV;
 
+	for (i = 0; i < ARRAY_SIZE(acpi_ac_blacklist); i++)
+		if (acpi_dev_present(acpi_ac_blacklist[i].hid, "1",
+				     acpi_ac_blacklist[i].hrv)) {
+			pr_info(PREFIX "AC: found native %s PMIC, not loading\n",
+				acpi_ac_blacklist[i].hid);
+			return -ENODEV;
+		}
+
 #ifdef CONFIG_ACPI_PROCFS_POWER
 	acpi_ac_dir = acpi_lock_ac_dir();
 	if (!acpi_ac_dir)
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 26696b693e63..8f57648f318b 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -106,6 +106,16 @@ static const struct apd_device_desc vulcan_spi_desc = {
 	.setup = acpi_apd_setup,
 	.fixed_clk_rate = 133000000,
 };
+
+static const struct apd_device_desc hip07_i2c_desc = {
+	.setup = acpi_apd_setup,
+	.fixed_clk_rate = 200000000,
+};
+
+static const struct apd_device_desc hip08_i2c_desc = {
+	.setup = acpi_apd_setup,
+	.fixed_clk_rate = 250000000,
+};
 #endif
 
 #else
@@ -169,6 +179,8 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
 #ifdef CONFIG_ARM64
 	{ "APMC0D0F", APD_ADDR(xgene_i2c_desc) },
 	{ "BRCM900D", APD_ADDR(vulcan_spi_desc) },
+	{ "HISI0A21", APD_ADDR(hip07_i2c_desc) },
+	{ "HISI0A22", APD_ADDR(hip08_i2c_desc) },
 #endif
 	{ }
 };
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a15270a806fc..502ea4dc2080 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -229,7 +229,7 @@ static int __init extlog_init(void)
 	if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
 		return -ENODEV;
 
-	if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
+	if (edac_get_report_status() == EDAC_REPORTING_FORCE) {
 		pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
 		return -EPERM;
 	}
@@ -285,8 +285,8 @@ static int __init extlog_init(void)
 	 * eMCA event report method has higher priority than EDAC method,
 	 * unless EDAC event report method is mandatory.
 	 */
-	old_edac_report_status = get_edac_report_status();
-	set_edac_report_status(EDAC_REPORTING_DISABLED);
+	old_edac_report_status = edac_get_report_status();
+	edac_set_report_status(EDAC_REPORTING_DISABLED);
 	mce_register_decode_chain(&extlog_mce_dec);
 	/* enable OS to be involved to take over management from BIOS */
 	((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,7 +308,7 @@ err:
 
 static void __exit extlog_exit(void)
 {
-	set_edac_report_status(old_edac_report_status);
+	edac_set_report_status(old_edac_report_status);
 	mce_unregister_decode_chain(&extlog_mce_dec);
 	((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
 	if (extlog_l1_addr)
diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c
index 747c2ba98534..1b64419e2fec 100644
--- a/drivers/acpi/acpi_ipmi.c
+++ b/drivers/acpi/acpi_ipmi.c
@@ -429,8 +429,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data)
 	if (msg->recv_type == IPMI_RESPONSE_RECV_TYPE &&
 	    msg->msg.data_len == 1) {
 		if (msg->msg.data[0] == IPMI_TIMEOUT_COMPLETION_CODE) {
-			dev_WARN_ONCE(dev, true,
-				      "Unexpected response (timeout).\n");
+			dev_dbg_once(dev, "Unexpected response (timeout).\n");
 			tx_msg->msg_done = ACPI_IPMI_TIMEOUT;
 		}
 		goto out_comp;
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 5edfd9c49044..10347e3d73ad 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -143,6 +143,22 @@ static void lpss_deassert_reset(struct lpss_private_data *pdata)
 	writel(val, pdata->mmio_base + offset);
 }
 
+/*
+ * BYT PWM used for backlight control by the i915 driver on systems without
+ * the Crystal Cove PMIC.
+ */
+static struct pwm_lookup byt_pwm_lookup[] = {
+	PWM_LOOKUP_WITH_MODULE("80860F09:00", 0, "0000:00:02.0",
+			       "pwm_backlight", 0, PWM_POLARITY_NORMAL,
+			       "pwm-lpss-platform"),
+};
+
+static void byt_pwm_setup(struct lpss_private_data *pdata)
+{
+	if (!acpi_dev_present("INT33FD", NULL, -1))
+		pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup));
+}
+
 #define LPSS_I2C_ENABLE			0x6c
 
 static void byt_i2c_setup(struct lpss_private_data *pdata)
@@ -200,6 +216,7 @@ static const struct lpss_device_desc lpt_sdio_dev_desc = {
 
 static const struct lpss_device_desc byt_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX,
+	.setup = byt_pwm_setup,
 };
 
 static const struct lpss_device_desc bsw_pwm_dev_desc = {
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 03250e1f1103..88cd949003f3 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -121,11 +121,14 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
 	if (IS_ERR(pdev))
 		dev_err(&adev->dev, "platform device creation failed: %ld\n",
 			PTR_ERR(pdev));
-	else
+	else {
+		set_dev_node(&pdev->dev, acpi_get_node(adev->handle));
 		dev_dbg(&adev->dev, "created platform device %s\n",
 			dev_name(&pdev->dev));
+	}
 
 	kfree(resources);
+
 	return pdev;
 }
 EXPORT_SYMBOL_GPL(acpi_create_platform_device);
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 0143135b3abe..f098e25b6b41 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -388,11 +388,6 @@ static int acpi_processor_add(struct acpi_device *device,
 	if (result) /* Processor is not physically present or unavailable */
 		return 0;
 
-#ifdef CONFIG_SMP
-	if (pr->id >= setup_max_cpus && pr->id != 0)
-		return 0;
-#endif
-
 	BUG_ON(pr->id >= nr_cpu_ids);
 
 	/*
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index d00bc0ef87a6..e88fe3632dd6 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -73,6 +73,10 @@ module_param(report_key_events, int, 0644);
 MODULE_PARM_DESC(report_key_events,
 	"0: none, 1: output changes, 2: brightness changes, 3: all");
 
+/*
+ * Whether the struct acpi_video_device_attrib::device_id_scheme bit should be
+ * assumed even if not actually set.
+ */
 static bool device_id_scheme = false;
 module_param(device_id_scheme, bool, 0444);
 
@@ -88,6 +92,18 @@ static int acpi_video_bus_remove(struct acpi_device *device);
 static void acpi_video_bus_notify(struct acpi_device *device, u32 event);
 void acpi_video_detect_exit(void);
 
+/*
+ * Indices in the _BCL method response: the first two items are special,
+ * the rest are all supported levels.
+ *
+ * See page 575 of the ACPI spec 3.0
+ */
+enum acpi_video_level_idx {
+	ACPI_VIDEO_AC_LEVEL,		/* level when machine has full power */
+	ACPI_VIDEO_BATTERY_LEVEL,	/* level when machine is on batteries */
+	ACPI_VIDEO_FIRST_LEVEL,		/* actual supported levels begin here */
+};
+
 static const struct acpi_device_id video_device_ids[] = {
 	{ACPI_VIDEO_HID, 0},
 	{"", 0},
@@ -132,7 +148,15 @@ struct acpi_video_device_attrib {
 				   the VGA device. */
 	u32 pipe_id:3;		/* For VGA multiple-head devices. */
 	u32 reserved:10;	/* Must be 0 */
-	u32 device_id_scheme:1;	/* Device ID Scheme */
+
+	/*
+	 * The device ID might not actually follow the scheme described by this
+	 * struct acpi_video_device_attrib. If it does, then this bit
+	 * device_id_scheme is set; otherwise, other fields should be ignored.
+	 *
+	 * (but also see the global flag device_id_scheme)
+	 */
+	u32 device_id_scheme:1;
 };
 
 struct acpi_video_enumerated_device {
@@ -217,20 +241,16 @@ static int acpi_video_get_brightness(struct backlight_device *bd)
 
 	if (acpi_video_device_lcd_get_level_current(vd, &cur_level, false))
 		return -EINVAL;
-	for (i = 2; i < vd->brightness->count; i++) {
+	for (i = ACPI_VIDEO_FIRST_LEVEL; i < vd->brightness->count; i++) {
 		if (vd->brightness->levels[i] == cur_level)
-			/*
-			 * The first two entries are special - see page 575
-			 * of the ACPI spec 3.0
-			 */
-			return i - 2;
+			return i - ACPI_VIDEO_FIRST_LEVEL;
 	}
 	return 0;
 }
 
 static int acpi_video_set_brightness(struct backlight_device *bd)
 {
-	int request_level = bd->props.brightness + 2;
+	int request_level = bd->props.brightness + ACPI_VIDEO_FIRST_LEVEL;
 	struct acpi_video_device *vd = bl_get_data(bd);
 
 	cancel_delayed_work(&vd->switch_brightness_work);
@@ -244,18 +264,18 @@ static const struct backlight_ops acpi_backlight_ops = {
 };
 
 /* thermal cooling device callbacks */
-static int video_get_max_state(struct thermal_cooling_device *cooling_dev, unsigned
-			       long *state)
+static int video_get_max_state(struct thermal_cooling_device *cooling_dev,
+			       unsigned long *state)
 {
 	struct acpi_device *device = cooling_dev->devdata;
 	struct acpi_video_device *video = acpi_driver_data(device);
 
-	*state = video->brightness->count - 3;
+	*state = video->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1;
 	return 0;
 }
 
-static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, unsigned
-			       long *state)
+static int video_get_cur_state(struct thermal_cooling_device *cooling_dev,
+			       unsigned long *state)
 {
 	struct acpi_device *device = cooling_dev->devdata;
 	struct acpi_video_device *video = acpi_driver_data(device);
@@ -264,7 +284,8 @@ static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, unsig
 
 	if (acpi_video_device_lcd_get_level_current(video, &level, false))
 		return -EINVAL;
-	for (offset = 2; offset < video->brightness->count; offset++)
+	for (offset = ACPI_VIDEO_FIRST_LEVEL; offset < video->brightness->count;
+	     offset++)
 		if (level == video->brightness->levels[offset]) {
 			*state = video->brightness->count - offset - 1;
 			return 0;
@@ -280,7 +301,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st
 	struct acpi_video_device *video = acpi_driver_data(device);
 	int level;
 
-	if (state >= video->brightness->count - 2)
+	if (state >= video->brightness->count - ACPI_VIDEO_FIRST_LEVEL)
 		return -EINVAL;
 
 	state = video->brightness->count - state;
@@ -345,10 +366,12 @@ acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level)
 	}
 
 	device->brightness->curr = level;
-	for (state = 2; state < device->brightness->count; state++)
+	for (state = ACPI_VIDEO_FIRST_LEVEL; state < device->brightness->count;
+	     state++)
 		if (level == device->brightness->levels[state]) {
 			if (device->backlight)
-				device->backlight->props.brightness = state - 2;
+				device->backlight->props.brightness =
+					state - ACPI_VIDEO_FIRST_LEVEL;
 			return 0;
 		}
 
@@ -530,14 +553,16 @@ acpi_video_bqc_value_to_level(struct acpi_video_device *device,
 
 	if (device->brightness->flags._BQC_use_index) {
 		/*
-		 * _BQC returns an index that doesn't account for
-		 * the first 2 items with special meaning, so we need
-		 * to compensate for that by offsetting ourselves
+		 * _BQC returns an index that doesn't account for the first 2
+		 * items with special meaning (see enum acpi_video_level_idx),
+		 * so we need to compensate for that by offsetting ourselves
 		 */
 		if (device->brightness->flags._BCL_reversed)
-			bqc_value = device->brightness->count - 3 - bqc_value;
+			bqc_value = device->brightness->count -
+				ACPI_VIDEO_FIRST_LEVEL - 1 - bqc_value;
 
-		level = device->brightness->levels[bqc_value + 2];
+		level = device->brightness->levels[bqc_value +
+		                                   ACPI_VIDEO_FIRST_LEVEL];
 	} else {
 		level = bqc_value;
 	}
@@ -571,7 +596,8 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
 
 			*level = acpi_video_bqc_value_to_level(device, *level);
 
-			for (i = 2; i < device->brightness->count; i++)
+			for (i = ACPI_VIDEO_FIRST_LEVEL;
+			     i < device->brightness->count; i++)
 				if (device->brightness->levels[i] == *level) {
 					device->brightness->curr = *level;
 					return 0;
@@ -714,9 +740,37 @@ static int acpi_video_bqc_quirk(struct acpi_video_device *device,
 
 	/*
 	 * Some systems always report current brightness level as maximum
-	 * through _BQC, we need to test another value for them.
+	 * through _BQC, we need to test another value for them. However,
+	 * there is a subtlety:
+	 *
+	 * If the _BCL package ordering is descending, the first level
+	 * (br->levels[2]) is likely to be 0, and if the number of levels
+	 * matches the number of steps, we might confuse a returned level to
+	 * mean the index.
+	 *
+	 * For example:
+	 *
+	 *     current_level = max_level = 100
+	 *     test_level = 0
+	 *     returned level = 100
+	 *
+	 * In this case 100 means the level, not the index, and _BCM failed.
+	 * Still, if the _BCL package ordering is descending, the index of
+	 * level 0 is also 100, so we assume _BQC is indexed, when it's not.
+	 *
+	 * This causes all _BQC calls to return bogus values causing weird
+	 * behavior from the user's perspective.  For example:
+	 *
+	 * xbacklight -set 10; xbacklight -set 20;
+	 *
+	 * would flash to 90% and then slowly down to the desired level (20).
+	 *
+	 * The solution is simple; test anything other than the first level
+	 * (e.g. 1).
 	 */
-	test_level = current_level == max_level ? br->levels[3] : max_level;
+	test_level = current_level == max_level
+		? br->levels[ACPI_VIDEO_FIRST_LEVEL + 1]
+		: max_level;
 
 	result = acpi_video_device_lcd_set_level(device, test_level);
 	if (result)
@@ -730,8 +784,8 @@ static int acpi_video_bqc_quirk(struct acpi_video_device *device,
 		/* buggy _BQC found, need to find out if it uses index */
 		if (level < br->count) {
 			if (br->flags._BCL_reversed)
-				level = br->count - 3 - level;
-			if (br->levels[level + 2] == test_level)
+				level = br->count - ACPI_VIDEO_FIRST_LEVEL - 1 - level;
+			if (br->levels[level + ACPI_VIDEO_FIRST_LEVEL] == test_level)
 				br->flags._BQC_use_index = 1;
 		}
 
@@ -761,7 +815,7 @@ int acpi_video_get_levels(struct acpi_device *device,
 		goto out;
 	}
 
-	if (obj->package.count < 2) {
+	if (obj->package.count < ACPI_VIDEO_FIRST_LEVEL) {
 		result = -EINVAL;
 		goto out;
 	}
@@ -773,8 +827,13 @@ int acpi_video_get_levels(struct acpi_device *device,
 		goto out;
 	}
 
-	br->levels = kmalloc((obj->package.count + 2) * sizeof *(br->levels),
-				GFP_KERNEL);
+	/*
+	 * Note that we have to reserve 2 extra items (ACPI_VIDEO_FIRST_LEVEL),
+	 * in order to account for buggy BIOS which don't export the first two
+	 * special levels (see below)
+	 */
+	br->levels = kmalloc((obj->package.count + ACPI_VIDEO_FIRST_LEVEL) *
+	                     sizeof(*br->levels), GFP_KERNEL);
 	if (!br->levels) {
 		result = -ENOMEM;
 		goto out_free;
@@ -788,7 +847,8 @@ int acpi_video_get_levels(struct acpi_device *device,
 		}
 		value = (u32) o->integer.value;
 		/* Skip duplicate entries */
-		if (count > 2 && br->levels[count - 1] == value)
+		if (count > ACPI_VIDEO_FIRST_LEVEL
+		    && br->levels[count - 1] == value)
 			continue;
 
 		br->levels[count] = value;
@@ -804,27 +864,30 @@ int acpi_video_get_levels(struct acpi_device *device,
 	 * In this case, the first two elements in _BCL packages
 	 * are also supported brightness levels that OS should take care of.
 	 */
-	for (i = 2; i < count; i++) {
-		if (br->levels[i] == br->levels[0])
+	for (i = ACPI_VIDEO_FIRST_LEVEL; i < count; i++) {
+		if (br->levels[i] == br->levels[ACPI_VIDEO_AC_LEVEL])
 			level_ac_battery++;
-		if (br->levels[i] == br->levels[1])
+		if (br->levels[i] == br->levels[ACPI_VIDEO_BATTERY_LEVEL])
 			level_ac_battery++;
 	}
 
-	if (level_ac_battery < 2) {
-		level_ac_battery = 2 - level_ac_battery;
+	if (level_ac_battery < ACPI_VIDEO_FIRST_LEVEL) {
+		level_ac_battery = ACPI_VIDEO_FIRST_LEVEL - level_ac_battery;
 		br->flags._BCL_no_ac_battery_levels = 1;
-		for (i = (count - 1 + level_ac_battery); i >= 2; i--)
+		for (i = (count - 1 + level_ac_battery);
+		     i >= ACPI_VIDEO_FIRST_LEVEL; i--)
 			br->levels[i] = br->levels[i - level_ac_battery];
 		count += level_ac_battery;
-	} else if (level_ac_battery > 2)
+	} else if (level_ac_battery > ACPI_VIDEO_FIRST_LEVEL)
 		ACPI_ERROR((AE_INFO, "Too many duplicates in _BCL package"));
 
 	/* Check if the _BCL package is in a reversed order */
-	if (max_level == br->levels[2]) {
+	if (max_level == br->levels[ACPI_VIDEO_FIRST_LEVEL]) {
 		br->flags._BCL_reversed = 1;
-		sort(&br->levels[2], count - 2, sizeof(br->levels[2]),
-			acpi_video_cmp_level, NULL);
+		sort(&br->levels[ACPI_VIDEO_FIRST_LEVEL],
+		     count - ACPI_VIDEO_FIRST_LEVEL,
+		     sizeof(br->levels[ACPI_VIDEO_FIRST_LEVEL]),
+		     acpi_video_cmp_level, NULL);
 	} else if (max_level != br->levels[count - 1])
 		ACPI_ERROR((AE_INFO,
 			    "Found unordered _BCL package"));
@@ -894,7 +957,7 @@ acpi_video_init_brightness(struct acpi_video_device *device)
 	 * level_old is invalid (no matter whether it's a level
 	 * or an index). Set the backlight to max_level in this case.
 	 */
-	for (i = 2; i < br->count; i++)
+	for (i = ACPI_VIDEO_FIRST_LEVEL; i < br->count; i++)
 		if (level == br->levels[i])
 			break;
 	if (i == br->count || !level)
@@ -906,7 +969,8 @@ set_level:
 		goto out_free_levels;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-			  "found %d brightness levels\n", br->count - 2));
+	                  "found %d brightness levels\n",
+	                  br->count - ACPI_VIDEO_FIRST_LEVEL));
 	return 0;
 
 out_free_levels:
@@ -1297,7 +1361,7 @@ acpi_video_get_next_level(struct acpi_video_device *device,
 	max = max_below = 0;
 	min = min_above = 255;
 	/* Find closest level to level_current */
-	for (i = 2; i < device->brightness->count; i++) {
+	for (i = ACPI_VIDEO_FIRST_LEVEL; i < device->brightness->count; i++) {
 		l = device->brightness->levels[i];
 		if (abs(l - level_current) < abs(delta)) {
 			delta = l - level_current;
@@ -1307,7 +1371,7 @@ acpi_video_get_next_level(struct acpi_video_device *device,
 	}
 	/* Ajust level_current to closest available level */
 	level_current += delta;
-	for (i = 2; i < device->brightness->count; i++) {
+	for (i = ACPI_VIDEO_FIRST_LEVEL; i < device->brightness->count; i++) {
 		l = device->brightness->levels[i];
 		if (l < min)
 			min = l;
@@ -1680,7 +1744,8 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device)
 
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_FIRMWARE;
-	props.max_brightness = device->brightness->count - 3;
+	props.max_brightness =
+		device->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1;
 	device->backlight = backlight_device_register(name,
 						      parent,
 						      device,
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 32d93edbc479..dea65306b687 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -2,7 +2,7 @@
 # Makefile for ACPICA Core interpreter
 #
 
-ccflags-y			:= -Os -DBUILDING_ACPICA
+ccflags-y			:= -Os -D_LINUX -DBUILDING_ACPICA
 ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
 
 # use acpi.o to put all files here into acpi.o modparam namespace
diff --git a/drivers/acpi/acpica/acconvert.h b/drivers/acpi/acpica/acconvert.h
new file mode 100644
index 000000000000..c84223b60b35
--- /dev/null
+++ b/drivers/acpi/acpica/acconvert.h
@@ -0,0 +1,144 @@
+/******************************************************************************
+ *
+ * Module Name: acapps - common include for ACPI applications/tools
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2017, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef _ACCONVERT
+#define _ACCONVERT
+
+/* Definitions for comment state */
+
+#define ASL_COMMENT_STANDARD    1
+#define ASLCOMMENT_INLINE       2
+#define ASL_COMMENT_OPEN_PAREN  3
+#define ASL_COMMENT_CLOSE_PAREN 4
+#define ASL_COMMENT_CLOSE_BRACE 5
+
+/* Definitions for comment print function*/
+
+#define AML_COMMENT_STANDARD    1
+#define AMLCOMMENT_INLINE       2
+#define AML_COMMENT_END_NODE    3
+#define AML_NAMECOMMENT         4
+#define AML_COMMENT_CLOSE_BRACE 5
+#define AML_COMMENT_ENDBLK      6
+#define AML_COMMENT_INCLUDE     7
+
+#ifdef ACPI_ASL_COMPILER
+/*
+ * cvcompiler
+ */
+void
+cv_process_comment(struct asl_comment_state current_state,
+		   char *string_buffer, int c1);
+
+void
+cv_process_comment_type2(struct asl_comment_state current_state,
+			 char *string_buffer);
+
+u32 cv_calculate_comment_lengths(union acpi_parse_object *op);
+
+void cv_process_comment_state(char input);
+
+char *cv_append_inline_comment(char *inline_comment, char *to_add);
+
+void cv_add_to_comment_list(char *to_add);
+
+void cv_place_comment(u8 type, char *comment_string);
+
+u32 cv_parse_op_block_type(union acpi_parse_object *op);
+
+struct acpi_comment_node *cv_comment_node_calloc(void);
+
+void cg_write_aml_def_block_comment(union acpi_parse_object *op);
+
+void
+cg_write_one_aml_comment(union acpi_parse_object *op,
+			 char *comment_to_print, u8 input_option);
+
+void cg_write_aml_comment(union acpi_parse_object *op);
+
+/*
+ * cvparser
+ */
+void
+cv_init_file_tree(struct acpi_table_header *table,
+		  u8 *aml_start, u32 aml_length);
+
+void cv_clear_op_comments(union acpi_parse_object *op);
+
+struct acpi_file_node *cv_filename_exists(char *filename,
+					  struct acpi_file_node *head);
+
+void cv_label_file_node(union acpi_parse_object *op);
+
+void
+cv_capture_list_comments(struct acpi_parse_state *parser_state,
+			 struct acpi_comment_node *list_head,
+			 struct acpi_comment_node *list_tail);
+
+void cv_capture_comments_only(struct acpi_parse_state *parser_state);
+
+void cv_capture_comments(struct acpi_walk_state *walk_state);
+
+void cv_transfer_comments(union acpi_parse_object *op);
+
+/*
+ * cvdisasm
+ */
+void cv_switch_files(u32 level, union acpi_parse_object *op);
+
+u8 cv_file_has_switched(union acpi_parse_object *op);
+
+void cv_close_paren_write_comment(union acpi_parse_object *op, u32 level);
+
+void cv_close_brace_write_comment(union acpi_parse_object *op, u32 level);
+
+void
+cv_print_one_comment_list(struct acpi_comment_node *comment_list, u32 level);
+
+void
+cv_print_one_comment_type(union acpi_parse_object *op,
+			  u8 comment_type, char *end_str, u32 level);
+
+#endif
+
+#endif				/* _ACCONVERT */
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 1d955fe216c4..abe8c316908c 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -370,6 +370,59 @@ ACPI_GLOBAL(const char, *acpi_gbl_pld_shape_list[]);
 
 #endif
 
+/*
+ * Meant for the -ca option.
+ */
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_inline_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_end_node_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_open_brace_comment, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_close_brace_comment, NULL);
+
+ACPI_INIT_GLOBAL(char *, acpi_gbl_root_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_parent_filename, NULL);
+ACPI_INIT_GLOBAL(char *, acpi_gbl_current_include_filename, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_last_list_head, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_head,
+		 NULL);
+ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_tail,
+		 NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_comment_addr_node,
+		 *acpi_gbl_comment_addr_list_head, NULL);
+
+ACPI_INIT_GLOBAL(union acpi_parse_object, *acpi_gbl_current_scope, NULL);
+
+ACPI_INIT_GLOBAL(struct acpi_file_node, *acpi_gbl_file_tree_root, NULL);
+
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_reg_comment_cache);
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_comment_addr_cache);
+ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_file_cache);
+
+ACPI_INIT_GLOBAL(u8, gbl_capture_comments, FALSE);
+
+ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_asl_conversion, FALSE);
+ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_conv_debug_file, NULL);
+
+ACPI_GLOBAL(char, acpi_gbl_table_sig[4]);
+
 /*****************************************************************************
  *
  * Application globals
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 8fd495e8fdce..f9b3f7fef462 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -53,7 +53,7 @@ typedef u32 acpi_mutex_handle;
 
 /* Total number of aml opcodes defined */
 
-#define AML_NUM_OPCODES                 0x82
+#define AML_NUM_OPCODES                 0x83
 
 /* Forward declarations */
 
@@ -754,21 +754,52 @@ union acpi_parse_value {
 #define ACPI_DISASM_ONLY_MEMBERS(a)
 #endif
 
+#if defined(ACPI_ASL_COMPILER)
+#define ACPI_CONVERTER_ONLY_MEMBERS(a)  a;
+#else
+#define ACPI_CONVERTER_ONLY_MEMBERS(a)
+#endif
+
 #define ACPI_PARSE_COMMON \
-	union acpi_parse_object         *parent;        /* Parent op */\
-	u8                              descriptor_type; /* To differentiate various internal objs */\
-	u8                              flags;          /* Type of Op */\
-	u16                             aml_opcode;     /* AML opcode */\
-	u8                              *aml;           /* Address of declaration in AML */\
-	union acpi_parse_object         *next;          /* Next op */\
-	struct acpi_namespace_node      *node;          /* For use by interpreter */\
-	union acpi_parse_value          value;          /* Value or args associated with the opcode */\
-	u8                              arg_list_length; /* Number of elements in the arg list */\
-	ACPI_DISASM_ONLY_MEMBERS (\
-	u16                             disasm_flags;   /* Used during AML disassembly */\
-	u8                              disasm_opcode;  /* Subtype used for disassembly */\
-	char                            *operator_symbol;/* Used for C-style operator name strings */\
-	char                            aml_op_name[16])	/* Op name (debug only) */
+	union acpi_parse_object         *parent;            /* Parent op */\
+	u8                              descriptor_type;    /* To differentiate various internal objs */\
+	u8                              flags;              /* Type of Op */\
+	u16                             aml_opcode;         /* AML opcode */\
+	u8                              *aml;               /* Address of declaration in AML */\
+	union acpi_parse_object         *next;              /* Next op */\
+	struct acpi_namespace_node      *node;              /* For use by interpreter */\
+	union acpi_parse_value          value;              /* Value or args associated with the opcode */\
+	u8                              arg_list_length;    /* Number of elements in the arg list */\
+	 ACPI_DISASM_ONLY_MEMBERS (\
+	u16                             disasm_flags;       /* Used during AML disassembly */\
+	u8                              disasm_opcode;      /* Subtype used for disassembly */\
+	char                            *operator_symbol;   /* Used for C-style operator name strings */\
+	char                            aml_op_name[16])    /* Op name (debug only) */\
+	 ACPI_CONVERTER_ONLY_MEMBERS (\
+	char                            *inline_comment;    /* Inline comment */\
+	char                            *end_node_comment;  /* End of node comment */\
+	char                            *name_comment;      /* Comment associated with the first parameter of the name node */\
+	char                            *close_brace_comment; /* Comments that come after } on the same as } */\
+	struct acpi_comment_node        *comment_list;      /* comments that appears before this node */\
+	struct acpi_comment_node        *end_blk_comment;   /* comments that at the end of a block but before ) or } */\
+	char                            *cv_filename;       /* Filename associated with this node. Used for ASL/ASL+ converter */\
+	char                            *cv_parent_filename)	/* Parent filename associated with this node. Used for ASL/ASL+ converter */
+
+/* categories of comments */
+
+typedef enum {
+	STANDARD_COMMENT = 1,
+	INLINE_COMMENT,
+	ENDNODE_COMMENT,
+	OPENBRACE_COMMENT,
+	CLOSE_BRACE_COMMENT,
+	STD_DEFBLK_COMMENT,
+	END_DEFBLK_COMMENT,
+	FILENAME_COMMENT,
+	PARENTFILENAME_COMMENT,
+	ENDBLK_COMMENT,
+	INCLUDE_COMMENT
+} asl_comment_types;
 
 /* Internal opcodes for disasm_opcode field above */
 
@@ -784,9 +815,38 @@ union acpi_parse_value {
 #define ACPI_DASM_LNOT_SUFFIX           0x09	/* End  of a Lnot_equal (etc.) pair of opcodes */
 #define ACPI_DASM_HID_STRING            0x0A	/* String is a _HID or _CID */
 #define ACPI_DASM_IGNORE_SINGLE         0x0B	/* Ignore the opcode but not it's children */
-#define ACPI_DASM_SWITCH_PREDICATE      0x0C	/* Object is a predicate for a Switch or Case block */
-#define ACPI_DASM_CASE                  0x0D	/* If/Else is a Case in a Switch/Case block */
-#define ACPI_DASM_DEFAULT               0x0E	/* Else is a Default in a Switch/Case block */
+#define ACPI_DASM_SWITCH                0x0C	/* While is a Switch */
+#define ACPI_DASM_SWITCH_PREDICATE      0x0D	/* Object is a predicate for a Switch or Case block */
+#define ACPI_DASM_CASE                  0x0E	/* If/Else is a Case in a Switch/Case block */
+#define ACPI_DASM_DEFAULT               0x0F	/* Else is a Default in a Switch/Case block */
+
+/*
+ * List struct used in the -ca option
+ */
+struct acpi_comment_node {
+	char *comment;
+	struct acpi_comment_node *next;
+};
+
+struct acpi_comment_addr_node {
+	u8 *addr;
+	struct acpi_comment_addr_node *next;
+};
+
+/*
+ * File node - used for "Include" operator file stack and
+ * depdendency tree for the -ca option
+ */
+struct acpi_file_node {
+	void *file;
+	char *filename;
+	char *file_start;	/* Points to AML and indicates when the AML for this particular file starts. */
+	char *file_end;		/* Points to AML and indicates when the AML for this particular file ends. */
+	struct acpi_file_node *next;
+	struct acpi_file_node *parent;
+	u8 include_written;
+	struct acpi_comment_node *include_comment;
+};
 
 /*
  * Generic operation (for example:  If, While, Store)
@@ -813,6 +873,8 @@ struct acpi_parse_obj_asl {
 	ACPI_PARSE_COMMON union acpi_parse_object *child;
 	union acpi_parse_object *parent_method;
 	char *filename;
+	u8 file_changed;
+	char *parent_filename;
 	char *external_name;
 	char *namepath;
 	char name_seg[4];
@@ -842,6 +904,14 @@ union acpi_parse_object {
 	struct acpi_parse_obj_asl asl;
 };
 
+struct asl_comment_state {
+	u8 comment_type;
+	u32 spaces_before;
+	union acpi_parse_object *latest_parse_node;
+	union acpi_parse_object *parsing_paren_brace_node;
+	u8 capture_comments;
+};
+
 /*
  * Parse state - one state per parser invocation and each control
  * method.
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index c3337514e0ed..c7f0c96cc00f 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -493,4 +493,39 @@
 
 #define ACPI_IS_OCTAL_DIGIT(d)              (((char)(d) >= '0') && ((char)(d) <= '7'))
 
+/*
+ * Macors used for the ASL-/ASL+ converter utility
+ */
+#ifdef ACPI_ASL_COMPILER
+
+#define ASL_CV_LABEL_FILENODE(a)         cv_label_file_node(a);
+#define ASL_CV_CAPTURE_COMMENTS_ONLY(a)   cv_capture_comments_only (a);
+#define ASL_CV_CAPTURE_COMMENTS(a)       cv_capture_comments (a);
+#define ASL_CV_TRANSFER_COMMENTS(a)      cv_transfer_comments (a);
+#define ASL_CV_CLOSE_PAREN(a,b)          cv_close_paren_write_comment(a,b);
+#define ASL_CV_CLOSE_BRACE(a,b)          cv_close_brace_write_comment(a,b);
+#define ASL_CV_SWITCH_FILES(a,b)         cv_switch_files(a,b);
+#define ASL_CV_CLEAR_OP_COMMENTS(a)       cv_clear_op_comments(a);
+#define ASL_CV_PRINT_ONE_COMMENT(a,b,c,d) cv_print_one_comment_type (a,b,c,d);
+#define ASL_CV_PRINT_ONE_COMMENT_LIST(a,b) cv_print_one_comment_list (a,b);
+#define ASL_CV_FILE_HAS_SWITCHED(a)       cv_file_has_switched(a)
+#define ASL_CV_INIT_FILETREE(a,b,c)      cv_init_file_tree(a,b,c);
+
+#else
+
+#define ASL_CV_LABEL_FILENODE(a)
+#define ASL_CV_CAPTURE_COMMENTS_ONLY(a)
+#define ASL_CV_CAPTURE_COMMENTS(a)
+#define ASL_CV_TRANSFER_COMMENTS(a)
+#define ASL_CV_CLOSE_PAREN(a,b)          acpi_os_printf (")");
+#define ASL_CV_CLOSE_BRACE(a,b)          acpi_os_printf ("}");
+#define ASL_CV_SWITCH_FILES(a,b)
+#define ASL_CV_CLEAR_OP_COMMENTS(a)
+#define ASL_CV_PRINT_ONE_COMMENT(a,b,c,d)
+#define ASL_CV_PRINT_ONE_COMMENT_LIST(a,b)
+#define ASL_CV_FILE_HAS_SWITCHED(a)       0
+#define ASL_CV_INIT_FILETREE(a,b,c)
+
+#endif
+
 #endif				/* ACMACROS_H */
diff --git a/drivers/acpi/acpica/acopcode.h b/drivers/acpi/acpica/acopcode.h
index e758f098ff4b..a5d9af758c52 100644
--- a/drivers/acpi/acpica/acopcode.h
+++ b/drivers/acpi/acpica/acopcode.h
@@ -90,6 +90,7 @@
 #define ARGP_BUFFER_OP                  ARGP_LIST3 (ARGP_PKGLENGTH,  ARGP_TERMARG,       ARGP_BYTELIST)
 #define ARGP_BYTE_OP                    ARGP_LIST1 (ARGP_BYTEDATA)
 #define ARGP_BYTELIST_OP                ARGP_LIST1 (ARGP_NAMESTRING)
+#define ARGP_COMMENT_OP                 ARGP_LIST2 (ARGP_BYTEDATA,   ARGP_COMMENT)
 #define ARGP_CONCAT_OP                  ARGP_LIST3 (ARGP_TERMARG,    ARGP_TERMARG,       ARGP_TARGET)
 #define ARGP_CONCAT_RES_OP              ARGP_LIST3 (ARGP_TERMARG,    ARGP_TERMARG,       ARGP_TARGET)
 #define ARGP_COND_REF_OF_OP             ARGP_LIST2 (ARGP_SIMPLENAME, ARGP_TARGET)
@@ -223,6 +224,7 @@
 #define ARGI_BUFFER_OP                  ARGI_LIST1 (ARGI_INTEGER)
 #define ARGI_BYTE_OP                    ARGI_INVALID_OPCODE
 #define ARGI_BYTELIST_OP                ARGI_INVALID_OPCODE
+#define ARGI_COMMENT_OP                 ARGI_INVALID_OPCODE
 #define ARGI_CONCAT_OP                  ARGI_LIST3 (ARGI_ANYTYPE,    ARGI_ANYTYPE,       ARGI_TARGETREF)
 #define ARGI_CONCAT_RES_OP              ARGI_LIST3 (ARGI_BUFFER,     ARGI_BUFFER,        ARGI_TARGETREF)
 #define ARGI_COND_REF_OF_OP             ARGI_LIST2 (ARGI_OBJECT_REF, ARGI_TARGETREF)
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index b536fd471292..176f7e9b4d0e 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -48,11 +48,8 @@
 
 /* primary opcodes */
 
-#define AML_NULL_CHAR               (u16) 0x00
-
 #define AML_ZERO_OP                 (u16) 0x00
 #define AML_ONE_OP                  (u16) 0x01
-#define AML_UNASSIGNED              (u16) 0x02
 #define AML_ALIAS_OP                (u16) 0x06
 #define AML_NAME_OP                 (u16) 0x08
 #define AML_BYTE_OP                 (u16) 0x0a
@@ -63,17 +60,15 @@
 #define AML_SCOPE_OP                (u16) 0x10
 #define AML_BUFFER_OP               (u16) 0x11
 #define AML_PACKAGE_OP              (u16) 0x12
-#define AML_VAR_PACKAGE_OP          (u16) 0x13	/* ACPI 2.0 */
+#define AML_VARIABLE_PACKAGE_OP     (u16) 0x13	/* ACPI 2.0 */
 #define AML_METHOD_OP               (u16) 0x14
 #define AML_EXTERNAL_OP             (u16) 0x15	/* ACPI 6.0 */
 #define AML_DUAL_NAME_PREFIX        (u16) 0x2e
-#define AML_MULTI_NAME_PREFIX_OP    (u16) 0x2f
-#define AML_NAME_CHAR_SUBSEQ        (u16) 0x30
-#define AML_NAME_CHAR_FIRST         (u16) 0x41
-#define AML_EXTENDED_OP_PREFIX      (u16) 0x5b
+#define AML_MULTI_NAME_PREFIX       (u16) 0x2f
+#define AML_EXTENDED_PREFIX         (u16) 0x5b
 #define AML_ROOT_PREFIX             (u16) 0x5c
 #define AML_PARENT_PREFIX           (u16) 0x5e
-#define AML_LOCAL_OP                (u16) 0x60
+#define AML_FIRST_LOCAL_OP          (u16) 0x60	/* Used for Local op # calculations */
 #define AML_LOCAL0                  (u16) 0x60
 #define AML_LOCAL1                  (u16) 0x61
 #define AML_LOCAL2                  (u16) 0x62
@@ -82,7 +77,7 @@
 #define AML_LOCAL5                  (u16) 0x65
 #define AML_LOCAL6                  (u16) 0x66
 #define AML_LOCAL7                  (u16) 0x67
-#define AML_ARG_OP                  (u16) 0x68
+#define AML_FIRST_ARG_OP            (u16) 0x68	/* Used for Arg op # calculations */
 #define AML_ARG0                    (u16) 0x68
 #define AML_ARG1                    (u16) 0x69
 #define AML_ARG2                    (u16) 0x6a
@@ -93,7 +88,7 @@
 #define AML_STORE_OP                (u16) 0x70
 #define AML_REF_OF_OP               (u16) 0x71
 #define AML_ADD_OP                  (u16) 0x72
-#define AML_CONCAT_OP               (u16) 0x73
+#define AML_CONCATENATE_OP          (u16) 0x73
 #define AML_SUBTRACT_OP             (u16) 0x74
 #define AML_INCREMENT_OP            (u16) 0x75
 #define AML_DECREMENT_OP            (u16) 0x76
@@ -110,7 +105,7 @@
 #define AML_FIND_SET_LEFT_BIT_OP    (u16) 0x81
 #define AML_FIND_SET_RIGHT_BIT_OP   (u16) 0x82
 #define AML_DEREF_OF_OP             (u16) 0x83
-#define AML_CONCAT_RES_OP           (u16) 0x84	/* ACPI 2.0 */
+#define AML_CONCATENATE_TEMPLATE_OP (u16) 0x84	/* ACPI 2.0 */
 #define AML_MOD_OP                  (u16) 0x85	/* ACPI 2.0 */
 #define AML_NOTIFY_OP               (u16) 0x86
 #define AML_SIZE_OF_OP              (u16) 0x87
@@ -122,18 +117,18 @@
 #define AML_CREATE_BIT_FIELD_OP     (u16) 0x8d
 #define AML_OBJECT_TYPE_OP          (u16) 0x8e
 #define AML_CREATE_QWORD_FIELD_OP   (u16) 0x8f	/* ACPI 2.0 */
-#define AML_LAND_OP                 (u16) 0x90
-#define AML_LOR_OP                  (u16) 0x91
-#define AML_LNOT_OP                 (u16) 0x92
-#define AML_LEQUAL_OP               (u16) 0x93
-#define AML_LGREATER_OP             (u16) 0x94
-#define AML_LLESS_OP                (u16) 0x95
+#define AML_LOGICAL_AND_OP          (u16) 0x90
+#define AML_LOGICAL_OR_OP           (u16) 0x91
+#define AML_LOGICAL_NOT_OP          (u16) 0x92
+#define AML_LOGICAL_EQUAL_OP        (u16) 0x93
+#define AML_LOGICAL_GREATER_OP      (u16) 0x94
+#define AML_LOGICAL_LESS_OP         (u16) 0x95
 #define AML_TO_BUFFER_OP            (u16) 0x96	/* ACPI 2.0 */
-#define AML_TO_DECSTRING_OP         (u16) 0x97	/* ACPI 2.0 */
-#define AML_TO_HEXSTRING_OP         (u16) 0x98	/* ACPI 2.0 */
+#define AML_TO_DECIMAL_STRING_OP    (u16) 0x97	/* ACPI 2.0 */
+#define AML_TO_HEX_STRING_OP        (u16) 0x98	/* ACPI 2.0 */
 #define AML_TO_INTEGER_OP           (u16) 0x99	/* ACPI 2.0 */
 #define AML_TO_STRING_OP            (u16) 0x9c	/* ACPI 2.0 */
-#define AML_COPY_OP                 (u16) 0x9d	/* ACPI 2.0 */
+#define AML_COPY_OBJECT_OP          (u16) 0x9d	/* ACPI 2.0 */
 #define AML_MID_OP                  (u16) 0x9e	/* ACPI 2.0 */
 #define AML_CONTINUE_OP             (u16) 0x9f	/* ACPI 2.0 */
 #define AML_IF_OP                   (u16) 0xa0
@@ -142,18 +137,27 @@
 #define AML_NOOP_OP                 (u16) 0xa3
 #define AML_RETURN_OP               (u16) 0xa4
 #define AML_BREAK_OP                (u16) 0xa5
-#define AML_BREAK_POINT_OP          (u16) 0xcc
+#define AML_COMMENT_OP              (u16) 0xa9
+#define AML_BREAKPOINT_OP          (u16) 0xcc
 #define AML_ONES_OP                 (u16) 0xff
 
-/* prefixed opcodes */
+/*
+ * Combination opcodes (actually two one-byte opcodes)
+ * Used by the disassembler and iASL compiler
+ */
+#define AML_LOGICAL_GREATER_EQUAL_OP (u16) 0x9295	/* LNot (LLess) */
+#define AML_LOGICAL_LESS_EQUAL_OP    (u16) 0x9294	/* LNot (LGreater) */
+#define AML_LOGICAL_NOT_EQUAL_OP     (u16) 0x9293	/* LNot (LEqual) */
+
+/* Prefixed (2-byte) opcodes (with AML_EXTENDED_PREFIX) */
 
-#define AML_EXTENDED_OPCODE         (u16) 0x5b00	/* prefix for 2-byte opcodes */
+#define AML_EXTENDED_OPCODE         (u16) 0x5b00	/* Prefix for 2-byte opcodes */
 
 #define AML_MUTEX_OP                (u16) 0x5b01
 #define AML_EVENT_OP                (u16) 0x5b02
-#define AML_SHIFT_RIGHT_BIT_OP      (u16) 0x5b10
-#define AML_SHIFT_LEFT_BIT_OP       (u16) 0x5b11
-#define AML_COND_REF_OF_OP          (u16) 0x5b12
+#define AML_SHIFT_RIGHT_BIT_OP      (u16) 0x5b10	/* Obsolete, not in ACPI spec */
+#define AML_SHIFT_LEFT_BIT_OP       (u16) 0x5b11	/* Obsolete, not in ACPI spec */
+#define AML_CONDITIONAL_REF_OF_OP   (u16) 0x5b12
 #define AML_CREATE_FIELD_OP         (u16) 0x5b13
 #define AML_LOAD_TABLE_OP           (u16) 0x5b1f	/* ACPI 2.0 */
 #define AML_LOAD_OP                 (u16) 0x5b20
@@ -175,21 +179,13 @@
 #define AML_FIELD_OP                (u16) 0x5b81
 #define AML_DEVICE_OP               (u16) 0x5b82
 #define AML_PROCESSOR_OP            (u16) 0x5b83
-#define AML_POWER_RES_OP            (u16) 0x5b84
+#define AML_POWER_RESOURCE_OP       (u16) 0x5b84
 #define AML_THERMAL_ZONE_OP         (u16) 0x5b85
 #define AML_INDEX_FIELD_OP          (u16) 0x5b86
 #define AML_BANK_FIELD_OP           (u16) 0x5b87
 #define AML_DATA_REGION_OP          (u16) 0x5b88	/* ACPI 2.0 */
 
 /*
- * Combination opcodes (actually two one-byte opcodes)
- * Used by the disassembler and iASL compiler
- */
-#define AML_LGREATEREQUAL_OP        (u16) 0x9295
-#define AML_LLESSEQUAL_OP           (u16) 0x9294
-#define AML_LNOTEQUAL_OP            (u16) 0x9293
-
-/*
  * Opcodes for "Field" operators
  */
 #define AML_FIELD_OFFSET_OP         (u8) 0x00
@@ -241,6 +237,7 @@
 #define ARGP_SIMPLENAME             0x12	/* name_string | local_term | arg_term */
 #define ARGP_NAME_OR_REF            0x13	/* For object_type only */
 #define ARGP_MAX                    0x13
+#define ARGP_COMMENT                0x14
 
 /*
  * Resolved argument types for the AML Interpreter
@@ -308,24 +305,19 @@
 #define ARGI_INVALID_OPCODE         0xFFFFFFFF
 
 /*
- * hash offsets
- */
-#define AML_EXTOP_HASH_OFFSET       22
-#define AML_LNOT_HASH_OFFSET        19
-
-/*
- * opcode groups and types
+ * Some of the flags and types below are of the form:
+ *
+ * AML_FLAGS_EXEC_#A_#T,#R, or
+ * AML_TYPE_EXEC_#A_#T,#R where:
+ *
+ *      #A is the number of required arguments
+ *      #T is the number of target operands
+ *      #R indicates whether there is a return value
  */
-#define OPGRP_NAMED                 0x01
-#define OPGRP_FIELD                 0x02
-#define OPGRP_BYTELIST              0x04
 
 /*
- * Opcode information
+ * Opcode information flags
  */
-
-/* Opcode flags */
-
 #define AML_LOGICAL                 0x0001
 #define AML_LOGICAL_NUMERIC         0x0002
 #define AML_MATH                    0x0004
@@ -342,7 +334,7 @@
 #define AML_CONSTANT                0x2000
 #define AML_NO_OPERAND_RESOLVE      0x4000
 
-/* Convenient flag groupings */
+/* Convenient flag groupings of the flags above */
 
 #define AML_FLAGS_EXEC_0A_0T_1R                                     AML_HAS_RETVAL
 #define AML_FLAGS_EXEC_1A_0T_0R     AML_HAS_ARGS	/* Monadic1  */
@@ -359,7 +351,7 @@
 
 /*
  * The opcode Type is used in a dispatch table, do not change
- * without updating the table.
+ * or add anything new without updating the table.
  */
 #define AML_TYPE_EXEC_0A_0T_1R      0x00
 #define AML_TYPE_EXEC_1A_0T_0R      0x01	/* Monadic1  */
@@ -385,7 +377,7 @@
 
 #define AML_TYPE_METHOD_CALL        0x10
 
-/* Misc */
+/* Miscellaneous types */
 
 #define AML_TYPE_CREATE_FIELD       0x11
 #define AML_TYPE_CREATE_OBJECT      0x12
@@ -395,7 +387,6 @@
 #define AML_TYPE_NAMED_SIMPLE       0x16
 #define AML_TYPE_NAMED_COMPLEX      0x17
 #define AML_TYPE_RETURN             0x18
-
 #define AML_TYPE_UNDEFINED          0x19
 #define AML_TYPE_BOGUS              0x1A
 
diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index 15c8237b8a80..df62c9245efc 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -422,6 +422,7 @@ acpi_db_walk_for_execute(acpi_handle obj_handle,
 
 	status = acpi_get_object_info(obj_handle, &obj_info);
 	if (ACPI_FAILURE(status)) {
+		ACPI_FREE(pathname);
 		return (status);
 	}
 
diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c
index 205b8e0eded5..8f665d94b8b5 100644
--- a/drivers/acpi/acpica/dbxface.c
+++ b/drivers/acpi/acpica/dbxface.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "amlcode.h"
 #include "acdebug.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_CA_DEBUGGER
 ACPI_MODULE_NAME("dbxface")
@@ -125,7 +126,7 @@ error_exit:
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Called for AML_BREAK_POINT_OP
+ * DESCRIPTION: Called for AML_BREAKPOINT_OP
  *
  ******************************************************************************/
 
@@ -368,7 +369,9 @@ acpi_db_single_step(struct acpi_walk_state *walk_state,
 		walk_state->method_breakpoint = 1;	/* Must be non-zero! */
 	}
 
+	acpi_ex_exit_interpreter();
 	status = acpi_db_start_command(walk_state, op);
+	acpi_ex_enter_interpreter();
 
 	/* User commands complete, continue execution of the interrupted method */
 
diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index d31b49feaa79..f470e81b0499 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c
@@ -347,7 +347,7 @@ acpi_ds_exec_end_control_op(struct acpi_walk_state *walk_state,
 
 		break;
 
-	case AML_BREAK_POINT_OP:
+	case AML_BREAKPOINT_OP:
 
 		acpi_db_signal_break_point(walk_state);
 
diff --git a/drivers/acpi/acpica/dsmthdat.c b/drivers/acpi/acpica/dsmthdat.c
index adcc72cd53a7..27a7de95f7b0 100644
--- a/drivers/acpi/acpica/dsmthdat.c
+++ b/drivers/acpi/acpica/dsmthdat.c
@@ -672,7 +672,8 @@ acpi_ds_store_object_to_local(u8 type,
  *
  * FUNCTION:    acpi_ds_method_data_get_type
  *
- * PARAMETERS:  opcode              - Either AML_LOCAL_OP or AML_ARG_OP
+ * PARAMETERS:  opcode              - Either AML_FIRST LOCAL_OP or
+ *                                    AML_FIRST_ARG_OP
  *              index               - Which Local or Arg whose type to get
  *              walk_state          - Current walk state object
  *
diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 8deaa16493a0..7df3152ed856 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -114,7 +114,7 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state,
 				    ((op->common.parent->common.aml_opcode ==
 				      AML_PACKAGE_OP)
 				     || (op->common.parent->common.aml_opcode ==
-					 AML_VAR_PACKAGE_OP))) {
+					 AML_VARIABLE_PACKAGE_OP))) {
 					/*
 					 * We didn't find the target and we are populating elements
 					 * of a package - ignore if slack enabled. Some ASL code
@@ -144,7 +144,7 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state,
 
 		if ((op->common.parent->common.aml_opcode == AML_PACKAGE_OP) ||
 		    (op->common.parent->common.aml_opcode ==
-		     AML_VAR_PACKAGE_OP)) {
+		     AML_VARIABLE_PACKAGE_OP)) {
 			/*
 			 * Attempt to resolve the node to a value before we insert it into
 			 * the package. If this is a reference to a common data type,
@@ -398,7 +398,7 @@ acpi_ds_build_internal_package_obj(struct acpi_walk_state *walk_state,
 
 	parent = op->common.parent;
 	while ((parent->common.aml_opcode == AML_PACKAGE_OP) ||
-	       (parent->common.aml_opcode == AML_VAR_PACKAGE_OP)) {
+	       (parent->common.aml_opcode == AML_VARIABLE_PACKAGE_OP)) {
 		parent = parent->common.parent;
 	}
 
@@ -769,10 +769,10 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state,
 		switch (op_info->type) {
 		case AML_TYPE_LOCAL_VARIABLE:
 
-			/* Local ID (0-7) is (AML opcode - base AML_LOCAL_OP) */
+			/* Local ID (0-7) is (AML opcode - base AML_FIRST_LOCAL_OP) */
 
 			obj_desc->reference.value =
-			    ((u32)opcode) - AML_LOCAL_OP;
+			    ((u32)opcode) - AML_FIRST_LOCAL_OP;
 			obj_desc->reference.class = ACPI_REFCLASS_LOCAL;
 
 #ifndef ACPI_NO_METHOD_EXECUTION
@@ -790,9 +790,10 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state,
 
 		case AML_TYPE_METHOD_ARGUMENT:
 
-			/* Arg ID (0-6) is (AML opcode - base AML_ARG_OP) */
+			/* Arg ID (0-6) is (AML opcode - base AML_FIRST_ARG_OP) */
 
-			obj_desc->reference.value = ((u32)opcode) - AML_ARG_OP;
+			obj_desc->reference.value =
+			    ((u32)opcode) - AML_FIRST_ARG_OP;
 			obj_desc->reference.class = ACPI_REFCLASS_ARG;
 
 #ifndef ACPI_NO_METHOD_EXECUTION
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index 148523205d41..9a8f8a992b3e 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -639,7 +639,7 @@ acpi_ds_eval_data_object_operands(struct acpi_walk_state *walk_state,
 		break;
 
 	case AML_PACKAGE_OP:
-	case AML_VAR_PACKAGE_OP:
+	case AML_VARIABLE_PACKAGE_OP:
 
 		status =
 		    acpi_ds_build_internal_package_obj(walk_state, op, length,
@@ -660,7 +660,7 @@ acpi_ds_eval_data_object_operands(struct acpi_walk_state *walk_state,
 		if ((!op->common.parent) ||
 		    ((op->common.parent->common.aml_opcode != AML_PACKAGE_OP) &&
 		     (op->common.parent->common.aml_opcode !=
-		      AML_VAR_PACKAGE_OP)
+		      AML_VARIABLE_PACKAGE_OP)
 		     && (op->common.parent->common.aml_opcode !=
 			 AML_NAME_OP))) {
 			walk_state->result_obj = obj_desc;
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index 049fbab4e5a6..406edec20de7 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -275,10 +275,10 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
 		if ((op->common.parent->common.aml_opcode == AML_REGION_OP) ||
 		    (op->common.parent->common.aml_opcode == AML_DATA_REGION_OP)
 		    || (op->common.parent->common.aml_opcode == AML_PACKAGE_OP)
-		    || (op->common.parent->common.aml_opcode ==
-			AML_VAR_PACKAGE_OP)
 		    || (op->common.parent->common.aml_opcode == AML_BUFFER_OP)
 		    || (op->common.parent->common.aml_opcode ==
+			AML_VARIABLE_PACKAGE_OP)
+		    || (op->common.parent->common.aml_opcode ==
 			AML_INT_EVAL_SUBTREE_OP)
 		    || (op->common.parent->common.aml_opcode ==
 			AML_BANK_FIELD_OP)) {
@@ -551,7 +551,7 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state,
 			 */
 			if (status == AE_NOT_FOUND) {
 				if (parent_op->common.aml_opcode ==
-				    AML_COND_REF_OF_OP) {
+				    AML_CONDITIONAL_REF_OF_OP) {
 					/*
 					 * For the Conditional Reference op, it's OK if
 					 * the name is not found;  We just need a way to
@@ -806,7 +806,7 @@ acpi_status acpi_ds_evaluate_name_path(struct acpi_walk_state *walk_state)
 	}
 
 	if ((op->common.parent->common.aml_opcode == AML_PACKAGE_OP) ||
-	    (op->common.parent->common.aml_opcode == AML_VAR_PACKAGE_OP) ||
+	    (op->common.parent->common.aml_opcode == AML_VARIABLE_PACKAGE_OP) ||
 	    (op->common.parent->common.aml_opcode == AML_REF_OF_OP)) {
 
 		/* TBD: Should we specify this feature as a bit of op_info->Flags of these opcodes? */
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 78f8e6a4f72f..a2ff8ad70d58 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -497,7 +497,7 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state)
 			if ((op->asl.parent) &&
 			    ((op->asl.parent->asl.aml_opcode == AML_PACKAGE_OP)
 			     || (op->asl.parent->asl.aml_opcode ==
-				 AML_VAR_PACKAGE_OP))) {
+				 AML_VARIABLE_PACKAGE_OP))) {
 				ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
 						  "Method Reference in a Package, Op=%p\n",
 						  op));
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 44d4553dfbdd..8d510c7e20c8 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -528,7 +528,7 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state)
 			status = acpi_ex_create_processor(walk_state);
 			break;
 
-		case AML_POWER_RES_OP:
+		case AML_POWER_RESOURCE_OP:
 
 			status = acpi_ex_create_power_resource(walk_state);
 			break;
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index 1a6f59079ea5..f222a80ca38e 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -249,14 +249,14 @@ acpi_ex_do_logical_numeric_op(u16 opcode,
 	ACPI_FUNCTION_TRACE(ex_do_logical_numeric_op);
 
 	switch (opcode) {
-	case AML_LAND_OP:	/* LAnd (Integer0, Integer1) */
+	case AML_LOGICAL_AND_OP:	/* LAnd (Integer0, Integer1) */
 
 		if (integer0 && integer1) {
 			local_result = TRUE;
 		}
 		break;
 
-	case AML_LOR_OP:	/* LOr (Integer0, Integer1) */
+	case AML_LOGICAL_OR_OP:	/* LOr (Integer0, Integer1) */
 
 		if (integer0 || integer1) {
 			local_result = TRUE;
@@ -365,21 +365,21 @@ acpi_ex_do_logical_op(u16 opcode,
 		integer1 = local_operand1->integer.value;
 
 		switch (opcode) {
-		case AML_LEQUAL_OP:	/* LEqual (Operand0, Operand1) */
+		case AML_LOGICAL_EQUAL_OP:	/* LEqual (Operand0, Operand1) */
 
 			if (integer0 == integer1) {
 				local_result = TRUE;
 			}
 			break;
 
-		case AML_LGREATER_OP:	/* LGreater (Operand0, Operand1) */
+		case AML_LOGICAL_GREATER_OP:	/* LGreater (Operand0, Operand1) */
 
 			if (integer0 > integer1) {
 				local_result = TRUE;
 			}
 			break;
 
-		case AML_LLESS_OP:	/* LLess (Operand0, Operand1) */
+		case AML_LOGICAL_LESS_OP:	/* LLess (Operand0, Operand1) */
 
 			if (integer0 < integer1) {
 				local_result = TRUE;
@@ -408,7 +408,7 @@ acpi_ex_do_logical_op(u16 opcode,
 				 (length0 > length1) ? length1 : length0);
 
 		switch (opcode) {
-		case AML_LEQUAL_OP:	/* LEqual (Operand0, Operand1) */
+		case AML_LOGICAL_EQUAL_OP:	/* LEqual (Operand0, Operand1) */
 
 			/* Length and all bytes must be equal */
 
@@ -420,7 +420,7 @@ acpi_ex_do_logical_op(u16 opcode,
 			}
 			break;
 
-		case AML_LGREATER_OP:	/* LGreater (Operand0, Operand1) */
+		case AML_LOGICAL_GREATER_OP:	/* LGreater (Operand0, Operand1) */
 
 			if (compare > 0) {
 				local_result = TRUE;
@@ -437,7 +437,7 @@ acpi_ex_do_logical_op(u16 opcode,
 			}
 			break;
 
-		case AML_LLESS_OP:	/* LLess (Operand0, Operand1) */
+		case AML_LOGICAL_LESS_OP:	/* LLess (Operand0, Operand1) */
 
 			if (compare > 0) {
 				goto cleanup;	/* FALSE */
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index ee7b62a86661..caa5ed1f65ec 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -122,7 +122,7 @@ static char *acpi_ex_allocate_name_string(u32 prefix_count, u32 num_name_segs)
 
 		/* Set up multi prefixes   */
 
-		*temp_ptr++ = AML_MULTI_NAME_PREFIX_OP;
+		*temp_ptr++ = AML_MULTI_NAME_PREFIX;
 		*temp_ptr++ = (char)num_name_segs;
 	} else if (2 == num_name_segs) {
 
@@ -342,7 +342,7 @@ acpi_ex_get_name_string(acpi_object_type data_type,
 			}
 			break;
 
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			ACPI_DEBUG_PRINT((ACPI_DB_LOAD,
 					  "MultiNamePrefix at %p\n",
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index af73fcde7e5c..e327349675cd 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -274,7 +274,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 	case AML_FIND_SET_RIGHT_BIT_OP:
 	case AML_FROM_BCD_OP:
 	case AML_TO_BCD_OP:
-	case AML_COND_REF_OF_OP:
+	case AML_CONDITIONAL_REF_OF_OP:
 
 		/* Create a return object of type Integer for these opcodes */
 
@@ -405,7 +405,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 			}
 			break;
 
-		case AML_COND_REF_OF_OP:	/* cond_ref_of (source_object, Result) */
+		case AML_CONDITIONAL_REF_OF_OP:	/* cond_ref_of (source_object, Result) */
 			/*
 			 * This op is a little strange because the internal return value is
 			 * different than the return value stored in the result descriptor
@@ -475,14 +475,14 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 		/*
 		 * ACPI 2.0 Opcodes
 		 */
-	case AML_COPY_OP:	/* Copy (Source, Target) */
+	case AML_COPY_OBJECT_OP:	/* copy_object (Source, Target) */
 
 		status =
 		    acpi_ut_copy_iobject_to_iobject(operand[0], &return_desc,
 						    walk_state);
 		break;
 
-	case AML_TO_DECSTRING_OP:	/* to_decimal_string (Data, Result) */
+	case AML_TO_DECIMAL_STRING_OP:	/* to_decimal_string (Data, Result) */
 
 		status =
 		    acpi_ex_convert_to_string(operand[0], &return_desc,
@@ -495,7 +495,7 @@ acpi_status acpi_ex_opcode_1A_1T_1R(struct acpi_walk_state *walk_state)
 		}
 		break;
 
-	case AML_TO_HEXSTRING_OP:	/* to_hex_string (Data, Result) */
+	case AML_TO_HEX_STRING_OP:	/* to_hex_string (Data, Result) */
 
 		status =
 		    acpi_ex_convert_to_string(operand[0], &return_desc,
@@ -603,7 +603,7 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
 	/* Examine the AML opcode */
 
 	switch (walk_state->opcode) {
-	case AML_LNOT_OP:	/* LNot (Operand) */
+	case AML_LOGICAL_NOT_OP:	/* LNot (Operand) */
 
 		return_desc = acpi_ut_create_integer_object((u64) 0);
 		if (!return_desc) {
@@ -652,9 +652,8 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
 		 * NOTE:  We use LNOT_OP here in order to force resolution of the
 		 * reference operand to an actual integer.
 		 */
-		status =
-		    acpi_ex_resolve_operands(AML_LNOT_OP, &temp_desc,
-					     walk_state);
+		status = acpi_ex_resolve_operands(AML_LOGICAL_NOT_OP,
+						  &temp_desc, walk_state);
 		if (ACPI_FAILURE(status)) {
 			ACPI_EXCEPTION((AE_INFO, status,
 					"While resolving operands for [%s]",
diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index 44ecba50c0da..eecb3bff7fd7 100644
--- a/drivers/acpi/acpica/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -298,7 +298,7 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 					NULL, &return_desc->integer.value);
 		break;
 
-	case AML_CONCAT_OP:	/* Concatenate (Data1, Data2, Result) */
+	case AML_CONCATENATE_OP:	/* Concatenate (Data1, Data2, Result) */
 
 		status =
 		    acpi_ex_do_concatenate(operand[0], operand[1], &return_desc,
@@ -343,7 +343,7 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 		       operand[0]->buffer.pointer, length);
 		break;
 
-	case AML_CONCAT_RES_OP:
+	case AML_CONCATENATE_TEMPLATE_OP:
 
 		/* concatenate_res_template (Buffer, Buffer, Result) (ACPI 2.0) */
 
diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index 31e4df97cbe1..688032b58a21 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -124,8 +124,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:     (M == P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LEQUAL_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_EQUAL_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
@@ -137,8 +137,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:                  (M >= P[i]) (M not_less than P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LLESS_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_LESS_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
@@ -151,7 +151,7 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:         (M > P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LGREATER_OP, match_obj,
+		    acpi_ex_do_logical_op(AML_LOGICAL_GREATER_OP, match_obj,
 					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
@@ -164,7 +164,7 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:                     (M <= P[i]) (M not_greater than P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LGREATER_OP, match_obj,
+		    acpi_ex_do_logical_op(AML_LOGICAL_GREATER_OP, match_obj,
 					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
@@ -178,8 +178,8 @@ acpi_ex_do_match(u32 match_op,
 		 * Change to:            (M < P[i])
 		 */
 		status =
-		    acpi_ex_do_logical_op(AML_LLESS_OP, match_obj, package_obj,
-					  &logical_result);
+		    acpi_ex_do_logical_op(AML_LOGICAL_LESS_OP, match_obj,
+					  package_obj, &logical_result);
 		if (ACPI_FAILURE(status)) {
 			return (FALSE);
 		}
diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index 7fecefc2e1b4..aa8c6fd74cc3 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -196,7 +196,8 @@ acpi_ex_resolve_object_to_value(union acpi_operand_object **stack_ptr,
 
 				if ((walk_state->opcode ==
 				     AML_INT_METHODCALL_OP)
-				    || (walk_state->opcode == AML_COPY_OP)) {
+				    || (walk_state->opcode ==
+					AML_COPY_OBJECT_OP)) {
 					break;
 				}
 
diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c
index a2f8001aeb86..bdd43cde8f36 100644
--- a/drivers/acpi/acpica/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -416,7 +416,7 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 
 	/* Only limited target types possible for everything except copy_object */
 
-	if (walk_state->opcode != AML_COPY_OP) {
+	if (walk_state->opcode != AML_COPY_OBJECT_OP) {
 		/*
 		 * Only copy_object allows all object types to be overwritten. For
 		 * target_ref(s), there are restrictions on the object types that
@@ -499,7 +499,8 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 	case ACPI_TYPE_STRING:
 	case ACPI_TYPE_BUFFER:
 
-		if ((walk_state->opcode == AML_COPY_OP) || !implicit_conversion) {
+		if ((walk_state->opcode == AML_COPY_OBJECT_OP) ||
+		    !implicit_conversion) {
 			/*
 			 * However, copy_object and Stores to arg_x do not perform
 			 * an implicit conversion, as per the ACPI specification.
diff --git a/drivers/acpi/acpica/exstoren.c b/drivers/acpi/acpica/exstoren.c
index 85db4716a043..56f59cf5da29 100644
--- a/drivers/acpi/acpica/exstoren.c
+++ b/drivers/acpi/acpica/exstoren.c
@@ -107,7 +107,7 @@ acpi_ex_resolve_object(union acpi_operand_object **source_desc_ptr,
 
 		/* For copy_object, no further validation necessary */
 
-		if (walk_state->opcode == AML_COPY_OP) {
+		if (walk_state->opcode == AML_COPY_OBJECT_OP) {
 			break;
 		}
 
diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index 531620abed80..3094cec4eab4 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -102,7 +102,7 @@ static const struct acpi_port_info acpi_protected_ports[] = {
 	{"PCI", 0x0CF8, 0x0CFF, ACPI_OSI_WIN_XP}
 };
 
-#define ACPI_PORT_INFO_ENTRIES  ACPI_ARRAY_LENGTH (acpi_protected_ports)
+#define ACPI_PORT_INFO_ENTRIES      ACPI_ARRAY_LENGTH (acpi_protected_ports)
 
 /******************************************************************************
  *
@@ -128,7 +128,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 	acpi_io_address last_address;
 	const struct acpi_port_info *port_info;
 
-	ACPI_FUNCTION_TRACE(hw_validate_io_request);
+	ACPI_FUNCTION_NAME(hw_validate_io_request);
 
 	/* Supported widths are 8/16/32 */
 
@@ -153,13 +153,13 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 		ACPI_ERROR((AE_INFO,
 			    "Illegal I/O port address/length above 64K: %8.8X%8.8X/0x%X",
 			    ACPI_FORMAT_UINT64(address), byte_width));
-		return_ACPI_STATUS(AE_LIMIT);
+		return (AE_LIMIT);
 	}
 
 	/* Exit if requested address is not within the protected port table */
 
 	if (address > acpi_protected_ports[ACPI_PORT_INFO_ENTRIES - 1].end) {
-		return_ACPI_STATUS(AE_OK);
+		return (AE_OK);
 	}
 
 	/* Check request against the list of protected I/O ports */
@@ -167,7 +167,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 	for (i = 0; i < ACPI_PORT_INFO_ENTRIES; i++, port_info++) {
 		/*
 		 * Check if the requested address range will write to a reserved
-		 * port. Four cases to consider:
+		 * port. There are four cases to consider:
 		 *
 		 * 1) Address range is contained completely in the port address range
 		 * 2) Address range overlaps port range at the port range start
@@ -198,7 +198,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
 		}
 	}
 
-	return_ACPI_STATUS(AE_OK);
+	return (AE_OK);
 }
 
 /******************************************************************************
@@ -206,7 +206,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width)
  * FUNCTION:    acpi_hw_read_port
  *
  * PARAMETERS:  Address             Address of I/O port/register to read
- *              Value               Where value is placed
+ *              Value               Where value (data) is returned
  *              Width               Number of bits
  *
  * RETURN:      Status and value read from port
@@ -244,7 +244,7 @@ acpi_status acpi_hw_read_port(acpi_io_address address, u32 *value, u32 width)
 	/*
 	 * There has been a protection violation within the request. Fall
 	 * back to byte granularity port I/O and ignore the failing bytes.
-	 * This provides Windows compatibility.
+	 * This provides compatibility with other ACPI implementations.
 	 */
 	for (i = 0, *value = 0; i < width; i += 8) {
 
@@ -307,7 +307,7 @@ acpi_status acpi_hw_write_port(acpi_io_address address, u32 value, u32 width)
 	/*
 	 * There has been a protection violation within the request. Fall
 	 * back to byte granularity port I/O and ignore the failing bytes.
-	 * This provides Windows compatibility.
+	 * This provides compatibility with other ACPI implementations.
 	 */
 	for (i = 0; i < width; i += 8) {
 
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 498bb8f70e6b..fb265b5737de 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -485,7 +485,7 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
 					  flags));
 			break;
 
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			/* More than one name_seg, search rules do not apply */
 
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 38316266521e..418ef2ac82ab 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -290,22 +290,12 @@ object_repaired:
 	/* Object was successfully repaired */
 
 	if (package_index != ACPI_NOT_PACKAGE_ELEMENT) {
-		/*
-		 * The original object is a package element. We need to
-		 * decrement the reference count of the original object,
-		 * for removing it from the package.
-		 *
-		 * However, if the original object was just wrapped with a
-		 * package object as part of the repair, we don't need to
-		 * change the reference count.
-		 */
+
+		/* Update reference count of new object */
+
 		if (!(info->return_flags & ACPI_OBJECT_WRAPPED)) {
 			new_object->common.reference_count =
 			    return_object->common.reference_count;
-
-			if (return_object->common.reference_count > 1) {
-				return_object->common.reference_count--;
-			}
 		}
 
 		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 352265498e90..06037e044694 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -403,16 +403,12 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info,
 			return (status);
 		}
 
-		/* Take care with reference counts */
-
 		if (original_element != *element_ptr) {
 
-			/* Element was replaced */
+			/* Update reference count of new object */
 
 			(*element_ptr)->common.reference_count =
 			    original_ref_count;
-
-			acpi_ut_remove_reference(original_element);
 		}
 
 		element_ptr++;
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 661676714f7b..2fe87d0dd9d5 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -252,7 +252,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 			internal_name[1] = AML_DUAL_NAME_PREFIX;
 			result = &internal_name[2];
 		} else {
-			internal_name[1] = AML_MULTI_NAME_PREFIX_OP;
+			internal_name[1] = AML_MULTI_NAME_PREFIX;
 			internal_name[2] = (char)num_segments;
 			result = &internal_name[3];
 		}
@@ -274,7 +274,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 			internal_name[i] = AML_DUAL_NAME_PREFIX;
 			result = &internal_name[(acpi_size)i + 1];
 		} else {
-			internal_name[i] = AML_MULTI_NAME_PREFIX_OP;
+			internal_name[i] = AML_MULTI_NAME_PREFIX;
 			internal_name[(acpi_size)i + 1] = (char)num_segments;
 			result = &internal_name[(acpi_size)i + 2];
 		}
@@ -450,7 +450,7 @@ acpi_ns_externalize_name(u32 internal_name_length,
 	 */
 	if (prefix_length < internal_name_length) {
 		switch (internal_name[prefix_length]) {
-		case AML_MULTI_NAME_PREFIX_OP:
+		case AML_MULTI_NAME_PREFIX:
 
 			/* <count> 4-byte names */
 
@@ -594,25 +594,20 @@ struct acpi_namespace_node *acpi_ns_validate_handle(acpi_handle handle)
 void acpi_ns_terminate(void)
 {
 	acpi_status status;
+	union acpi_operand_object *prev;
+	union acpi_operand_object *next;
 
 	ACPI_FUNCTION_TRACE(ns_terminate);
 
-#ifdef ACPI_EXEC_APP
-	{
-		union acpi_operand_object *prev;
-		union acpi_operand_object *next;
+	/* Delete any module-level code blocks */
 
-		/* Delete any module-level code blocks */
-
-		next = acpi_gbl_module_code_list;
-		while (next) {
-			prev = next;
-			next = next->method.mutex;
-			prev->method.mutex = NULL;	/* Clear the Mutex (cheated) field */
-			acpi_ut_remove_reference(prev);
-		}
+	next = acpi_gbl_module_code_list;
+	while (next) {
+		prev = next;
+		next = next->method.mutex;
+		prev->method.mutex = NULL;	/* Clear the Mutex (cheated) field */
+		acpi_ut_remove_reference(prev);
 	}
-#endif
 
 	/*
 	 * Free the entire namespace -- all nodes and all objects
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 05b62ad44c3e..eb9dfaca555f 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -47,6 +47,7 @@
 #include "amlcode.h"
 #include "acnamesp.h"
 #include "acdispat.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psargs")
@@ -186,7 +187,7 @@ char *acpi_ps_get_next_namestring(struct acpi_parse_state *parser_state)
 		end += 1 + (2 * ACPI_NAME_SIZE);
 		break;
 
-	case AML_MULTI_NAME_PREFIX_OP:
+	case AML_MULTI_NAME_PREFIX:
 
 		/* Multiple name segments, 4 chars each, count in next byte */
 
@@ -339,7 +340,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state,
 		/* 2) not_found during a cond_ref_of(x) is ok by definition */
 
 		else if (walk_state->op->common.aml_opcode ==
-			 AML_COND_REF_OF_OP) {
+			 AML_CONDITIONAL_REF_OF_OP) {
 			status = AE_OK;
 		}
 
@@ -352,7 +353,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state,
 			 ((arg->common.parent->common.aml_opcode ==
 			   AML_PACKAGE_OP)
 			  || (arg->common.parent->common.aml_opcode ==
-			      AML_VAR_PACKAGE_OP))) {
+			      AML_VARIABLE_PACKAGE_OP))) {
 			status = AE_OK;
 		}
 	}
@@ -502,6 +503,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 	ACPI_FUNCTION_TRACE(ps_get_next_field);
 
+	ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 	aml = parser_state->aml;
 
 	/* Determine field type */
@@ -546,6 +548,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 	/* Decode the field type */
 
+	ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 	switch (opcode) {
 	case AML_INT_NAMEDFIELD_OP:
 
@@ -555,6 +558,22 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 		acpi_ps_set_name(field, name);
 		parser_state->aml += ACPI_NAME_SIZE;
 
+		ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
+
+#ifdef ACPI_ASL_COMPILER
+		/*
+		 * Because the package length isn't represented as a parse tree object,
+		 * take comments surrounding this and add to the previously created
+		 * parse node.
+		 */
+		if (field->common.inline_comment) {
+			field->common.name_comment =
+			    field->common.inline_comment;
+		}
+		field->common.inline_comment = acpi_gbl_current_inline_comment;
+		acpi_gbl_current_inline_comment = NULL;
+#endif
+
 		/* Get the length which is encoded as a package length */
 
 		field->common.value.size =
@@ -609,11 +628,13 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 		if (ACPI_GET8(parser_state->aml) == AML_BUFFER_OP) {
 			parser_state->aml++;
 
+			ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 			pkg_end = parser_state->aml;
 			pkg_length =
 			    acpi_ps_get_next_package_length(parser_state);
 			pkg_end += pkg_length;
 
+			ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 			if (parser_state->aml < pkg_end) {
 
 				/* Non-empty list */
@@ -630,6 +651,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 				opcode = ACPI_GET8(parser_state->aml);
 				parser_state->aml++;
 
+				ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 				switch (opcode) {
 				case AML_BYTE_OP:	/* AML_BYTEDATA_ARG */
 
@@ -660,6 +682,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 				/* Fill in bytelist data */
 
+				ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 				arg->named.value.size = buffer_length;
 				arg->named.data = parser_state->aml;
 			}
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 14d689606d2f..b4224005783c 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -55,6 +55,7 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psloop")
@@ -132,6 +133,21 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 		       !walk_state->arg_count) {
 			walk_state->aml = walk_state->parser_state.aml;
 
+			switch (op->common.aml_opcode) {
+			case AML_METHOD_OP:
+			case AML_BUFFER_OP:
+			case AML_PACKAGE_OP:
+			case AML_VARIABLE_PACKAGE_OP:
+			case AML_WHILE_OP:
+
+				break;
+
+			default:
+
+				ASL_CV_CAPTURE_COMMENTS(walk_state);
+				break;
+			}
+
 			status =
 			    acpi_ps_get_next_arg(walk_state,
 						 &(walk_state->parser_state),
@@ -254,7 +270,7 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 
 		case AML_BUFFER_OP:
 		case AML_PACKAGE_OP:
-		case AML_VAR_PACKAGE_OP:
+		case AML_VARIABLE_PACKAGE_OP:
 
 			if ((op->common.parent) &&
 			    (op->common.parent->common.aml_opcode ==
@@ -480,6 +496,8 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
 	/* Iterative parsing loop, while there is more AML to process: */
 
 	while ((parser_state->aml < parser_state->aml_end) || (op)) {
+		ASL_CV_CAPTURE_COMMENTS(walk_state);
+
 		aml_op_start = parser_state->aml;
 		if (!op) {
 			status =
@@ -516,6 +534,20 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
 		 */
 		walk_state->arg_count = 0;
 
+		switch (op->common.aml_opcode) {
+		case AML_BYTE_OP:
+		case AML_WORD_OP:
+		case AML_DWORD_OP:
+		case AML_QWORD_OP:
+
+			break;
+
+		default:
+
+			ASL_CV_CAPTURE_COMMENTS(walk_state);
+			break;
+		}
+
 		/* Are there any arguments that must be processed? */
 
 		if (walk_state->arg_types) {
diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c
index 5c4aff0f4f26..5bcb61831706 100644
--- a/drivers/acpi/acpica/psobject.c
+++ b/drivers/acpi/acpica/psobject.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psobject")
@@ -190,6 +191,7 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 	 */
 	while (GET_CURRENT_ARG_TYPE(walk_state->arg_types) &&
 	       (GET_CURRENT_ARG_TYPE(walk_state->arg_types) != ARGP_NAME)) {
+		ASL_CV_CAPTURE_COMMENTS(walk_state);
 		status =
 		    acpi_ps_get_next_arg(walk_state,
 					 &(walk_state->parser_state),
@@ -203,6 +205,18 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 		INCREMENT_ARG_LIST(walk_state->arg_types);
 	}
 
+	/* are there any inline comments associated with the name_seg?? If so, save this. */
+
+	ASL_CV_CAPTURE_COMMENTS(walk_state);
+
+#ifdef ACPI_ASL_COMPILER
+	if (acpi_gbl_current_inline_comment != NULL) {
+		unnamed_op->common.name_comment =
+		    acpi_gbl_current_inline_comment;
+		acpi_gbl_current_inline_comment = NULL;
+	}
+#endif
+
 	/*
 	 * Make sure that we found a NAME and didn't run out of arguments
 	 */
@@ -243,6 +257,30 @@ acpi_ps_build_named_op(struct acpi_walk_state *walk_state,
 
 	acpi_ps_append_arg(*op, unnamed_op->common.value.arg);
 
+#ifdef ACPI_ASL_COMPILER
+
+	/* save any comments that might be associated with unnamed_op. */
+
+	(*op)->common.inline_comment = unnamed_op->common.inline_comment;
+	(*op)->common.end_node_comment = unnamed_op->common.end_node_comment;
+	(*op)->common.close_brace_comment =
+	    unnamed_op->common.close_brace_comment;
+	(*op)->common.name_comment = unnamed_op->common.name_comment;
+	(*op)->common.comment_list = unnamed_op->common.comment_list;
+	(*op)->common.end_blk_comment = unnamed_op->common.end_blk_comment;
+	(*op)->common.cv_filename = unnamed_op->common.cv_filename;
+	(*op)->common.cv_parent_filename =
+	    unnamed_op->common.cv_parent_filename;
+	(*op)->named.aml = unnamed_op->common.aml;
+
+	unnamed_op->common.inline_comment = NULL;
+	unnamed_op->common.end_node_comment = NULL;
+	unnamed_op->common.close_brace_comment = NULL;
+	unnamed_op->common.name_comment = NULL;
+	unnamed_op->common.comment_list = NULL;
+	unnamed_op->common.end_blk_comment = NULL;
+#endif
+
 	if ((*op)->common.aml_opcode == AML_REGION_OP ||
 	    (*op)->common.aml_opcode == AML_DATA_REGION_OP) {
 		/*
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 451b672915f1..c343a0d5a3d2 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -69,7 +69,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_FIELD_OP
 	AML_INDEX_FIELD_OP
@@ -95,7 +95,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_FIELD_OP
 	AML_INDEX_FIELD_OP
@@ -113,7 +113,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_NAME_OP
 	AML_ALIAS_OP
@@ -136,7 +136,7 @@ ACPI_MODULE_NAME("psopcode")
 	AML_DEVICE_OP
 	AML_THERMAL_ZONE_OP
 	AML_METHOD_OP
-	AML_POWER_RES_OP
+	AML_POWER_RESOURCE_OP
 	AML_PROCESSOR_OP
 	AML_NAME_OP
 	AML_ALIAS_OP
@@ -149,7 +149,7 @@ ACPI_MODULE_NAME("psopcode")
   must be deferred until needed
 
 	AML_METHOD_OP
-	AML_VAR_PACKAGE_OP
+	AML_VARIABLE_PACKAGE_OP
 	AML_CREATE_FIELD_OP
 	AML_CREATE_BIT_FIELD_OP
 	AML_CREATE_BYTE_FIELD_OP
@@ -652,7 +652,10 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
 
 	/* 81 */ ACPI_OP("External", ARGP_EXTERNAL_OP, ARGI_EXTERNAL_OP,
 			 ACPI_TYPE_ANY, AML_CLASS_EXECUTE, /* ? */
-			 AML_TYPE_EXEC_3A_0T_0R, AML_FLAGS_EXEC_3A_0T_0R)
+			 AML_TYPE_EXEC_3A_0T_0R, AML_FLAGS_EXEC_3A_0T_0R),
+/* 82 */ ACPI_OP("Comment", ARGP_COMMENT_OP, ARGI_COMMENT_OP,
+			 ACPI_TYPE_STRING, AML_CLASS_ARGUMENT,
+			 AML_TYPE_LITERAL, AML_CONSTANT)
 
 /*! [End] no source code translation !*/
 };
diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index 89f95b7f26e9..eff22950232b 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c
@@ -226,7 +226,7 @@ const u8 acpi_gbl_short_op_index[256] = {
 /* 0x90 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x73, 0x74,
 /* 0x98 */ 0x75, 0x76, _UNK, _UNK, 0x77, 0x78, 0x79, 0x7A,
 /* 0xA0 */ 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x60, 0x61,
-/* 0xA8 */ 0x62, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
+/* 0xA8 */ 0x62, 0x82, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xB0 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xB8 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
 /* 0xC0 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index a813bbbd5a8b..8116a670de39 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -105,7 +105,7 @@ u16 acpi_ps_peek_opcode(struct acpi_parse_state * parser_state)
 	aml = parser_state->aml;
 	opcode = (u16) ACPI_GET8(aml);
 
-	if (opcode == AML_EXTENDED_OP_PREFIX) {
+	if (opcode == AML_EXTENDED_PREFIX) {
 
 		/* Extended opcode, get the second opcode byte */
 
@@ -210,7 +210,7 @@ acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
 			    || (op->common.parent->common.aml_opcode ==
 				AML_BANK_FIELD_OP)
 			    || (op->common.parent->common.aml_opcode ==
-				AML_VAR_PACKAGE_OP)) {
+				AML_VARIABLE_PACKAGE_OP)) {
 				replacement_op =
 				    acpi_ps_alloc_op(AML_INT_RETURN_VALUE_OP,
 						     op->common.aml);
@@ -225,7 +225,7 @@ acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
 				if ((op->common.aml_opcode == AML_BUFFER_OP)
 				    || (op->common.aml_opcode == AML_PACKAGE_OP)
 				    || (op->common.aml_opcode ==
-					AML_VAR_PACKAGE_OP)) {
+					AML_VARIABLE_PACKAGE_OP)) {
 					replacement_op =
 					    acpi_ps_alloc_op(op->common.
 							     aml_opcode,
diff --git a/drivers/acpi/acpica/pstree.c b/drivers/acpi/acpica/pstree.c
index 9677fff8fd47..c06d6e2fc7a5 100644
--- a/drivers/acpi/acpica/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("pstree")
@@ -216,6 +217,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 	next = acpi_ps_get_arg(op, 0);
 	if (next) {
+		ASL_CV_LABEL_FILENODE(next);
 		return (next);
 	}
 
@@ -223,6 +225,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 	next = op->common.next;
 	if (next) {
+		ASL_CV_LABEL_FILENODE(next);
 		return (next);
 	}
 
@@ -233,6 +236,8 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 	while (parent) {
 		arg = acpi_ps_get_arg(parent, 0);
 		while (arg && (arg != origin) && (arg != op)) {
+
+			ASL_CV_LABEL_FILENODE(arg);
 			arg = arg->common.next;
 		}
 
@@ -247,6 +252,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 
 			/* Found sibling of parent */
 
+			ASL_CV_LABEL_FILENODE(parent->common.next);
 			return (parent->common.next);
 		}
 
@@ -254,6 +260,7 @@ union acpi_parse_object *acpi_ps_get_depth_next(union acpi_parse_object *origin,
 		parent = parent->common.parent;
 	}
 
+	ASL_CV_LABEL_FILENODE(next);
 	return (next);
 }
 
@@ -296,7 +303,7 @@ union acpi_parse_object *acpi_ps_get_child(union acpi_parse_object *op)
 		child = acpi_ps_get_arg(op, 1);
 		break;
 
-	case AML_POWER_RES_OP:
+	case AML_POWER_RESOURCE_OP:
 	case AML_INDEX_FIELD_OP:
 
 		child = acpi_ps_get_arg(op, 2);
diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index 2fa38bb76a55..02642760cb93 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acparser.h"
 #include "amlcode.h"
+#include "acconvert.h"
 
 #define _COMPONENT          ACPI_PARSER
 ACPI_MODULE_NAME("psutils")
@@ -152,6 +153,15 @@ union acpi_parse_object *acpi_ps_alloc_op(u16 opcode, u8 *aml)
 		acpi_ps_init_op(op, opcode);
 		op->common.aml = aml;
 		op->common.flags = flags;
+		ASL_CV_CLEAR_OP_COMMENTS(op);
+
+		if (opcode == AML_SCOPE_OP) {
+			acpi_gbl_current_scope = op;
+		}
+	}
+
+	if (gbl_capture_comments) {
+		ASL_CV_TRANSFER_COMMENTS(op);
 	}
 
 	return (op);
@@ -174,6 +184,7 @@ void acpi_ps_free_op(union acpi_parse_object *op)
 {
 	ACPI_FUNCTION_NAME(ps_free_op);
 
+	ASL_CV_CLEAR_OP_COMMENTS(op);
 	if (op->common.aml_opcode == AML_INT_RETURN_VALUE_OP) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
 				  "Free retval op: %p\n", op));
diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index a3401bd29413..5594a359dbf1 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -142,6 +142,45 @@ acpi_status acpi_ut_create_caches(void)
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
+#ifdef ACPI_ASL_COMPILER
+	/*
+	 * For use with the ASL-/ASL+ option. This cache keeps track of regular
+	 * 0xA9 0x01 comments.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-Comment",
+				 sizeof(struct acpi_comment_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_reg_comment_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * This cache keeps track of the starting addresses of where the comments
+	 * lie. This helps prevent duplication of comments.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-Comment-Addr",
+				 sizeof(struct acpi_comment_addr_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_comment_addr_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * This cache will be used for nodes that represent files.
+	 */
+	status =
+	    acpi_os_create_cache("Acpi-File", sizeof(struct acpi_file_node),
+				 ACPI_MAX_COMMENT_CACHE_DEPTH,
+				 &acpi_gbl_file_cache);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+#endif
+
 #ifdef ACPI_DBG_TRACK_ALLOCATIONS
 
 	/* Memory allocation lists */
@@ -201,6 +240,17 @@ acpi_status acpi_ut_delete_caches(void)
 	(void)acpi_os_delete_cache(acpi_gbl_ps_node_ext_cache);
 	acpi_gbl_ps_node_ext_cache = NULL;
 
+#ifdef ACPI_ASL_COMPILER
+	(void)acpi_os_delete_cache(acpi_gbl_reg_comment_cache);
+	acpi_gbl_reg_comment_cache = NULL;
+
+	(void)acpi_os_delete_cache(acpi_gbl_comment_addr_cache);
+	acpi_gbl_comment_addr_cache = NULL;
+
+	(void)acpi_os_delete_cache(acpi_gbl_file_cache);
+	acpi_gbl_file_cache = NULL;
+#endif
+
 #ifdef ACPI_DBG_TRACK_ALLOCATIONS
 
 	/* Debug only - display leftover memory allocation, if any */
diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index 11c7f72f2d56..531493306dee 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -71,7 +71,7 @@ acpi_os_create_cache(char *cache_name,
 
 	ACPI_FUNCTION_ENTRY();
 
-	if (!cache_name || !return_cache || (object_size < 16)) {
+	if (!cache_name || !return_cache || !object_size) {
 		return (AE_BAD_PARAMETER);
 	}
 
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index bd5ea3101eb7..615a885e2ca3 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -627,4 +627,5 @@ acpi_trace_point(acpi_trace_event_type type, u8 begin, u8 *aml, char *pathname)
 }
 
 ACPI_EXPORT_SYMBOL(acpi_trace_point)
+
 #endif
diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c
index ff096d9755b9..e0587c85bafd 100644
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -474,6 +474,15 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
 				return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
 			}
 
+			/*
+			 * The end_tag opcode must be followed by a zero byte.
+			 * Although this byte is technically defined to be a checksum,
+			 * in practice, all ASL compilers set this byte to zero.
+			 */
+			if (*(aml + 1) != 0) {
+				return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
+			}
+
 			/* Return the pointer to the end_tag if requested */
 
 			if (!user_function) {
diff --git a/drivers/acpi/acpica/utxferror.c b/drivers/acpi/acpica/utxferror.c
index a16bd9eac653..950a1e500bfa 100644
--- a/drivers/acpi/acpica/utxferror.c
+++ b/drivers/acpi/acpica/utxferror.c
@@ -91,7 +91,7 @@ ACPI_EXPORT_SYMBOL(acpi_error)
  *
  * PARAMETERS:  module_name         - Caller's module name (for error output)
  *              line_number         - Caller's line number (for error output)
- *              status              - Status to be formatted
+ *              status              - Status value to be decoded/formatted
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
@@ -132,8 +132,8 @@ ACPI_EXPORT_SYMBOL(acpi_exception)
  *
  * FUNCTION:    acpi_warning
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
+ * PARAMETERS:  module_name         - Caller's module name (for warning output)
+ *              line_number         - Caller's line number (for warning output)
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
@@ -163,17 +163,13 @@ ACPI_EXPORT_SYMBOL(acpi_warning)
  *
  * FUNCTION:    acpi_info
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
- *              format              - Printf format string + additional args
+ * PARAMETERS:  format              - Printf format string + additional args
  *
  * RETURN:      None
  *
  * DESCRIPTION: Print generic "ACPI:" information message. There is no
  *              module/line/version info in order to keep the message simple.
  *
- * TBD: module_name and line_number args are not needed, should be removed.
- *
  ******************************************************************************/
 void ACPI_INTERNAL_VAR_XFACE acpi_info(const char *format, ...)
 {
@@ -229,8 +225,8 @@ ACPI_EXPORT_SYMBOL(acpi_bios_error)
  *
  * FUNCTION:    acpi_bios_warning
  *
- * PARAMETERS:  module_name         - Caller's module name (for error output)
- *              line_number         - Caller's line number (for error output)
+ * PARAMETERS:  module_name         - Caller's module name (for warning output)
+ *              line_number         - Caller's line number (for warning output)
  *              format              - Printf format string + additional args
  *
  * RETURN:      None
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 4ef1e4624b2b..d42eeef9d928 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -67,6 +67,7 @@ MODULE_DESCRIPTION("ACPI Battery Driver");
 MODULE_LICENSE("GPL");
 
 static async_cookie_t async_cookie;
+static bool battery_driver_registered;
 static int battery_bix_broken_package;
 static int battery_notification_delay_ms;
 static unsigned int cache_time = 1000;
@@ -93,6 +94,11 @@ static const struct acpi_device_id battery_device_ids[] = {
 
 MODULE_DEVICE_TABLE(acpi, battery_device_ids);
 
+/* Lists of PMIC ACPI HIDs with an (often better) native battery driver */
+static const char * const acpi_battery_blacklist[] = {
+	"INT33F4", /* X-Powers AXP288 PMIC */
+};
+
 enum {
 	ACPI_BATTERY_ALARM_PRESENT,
 	ACPI_BATTERY_XINFO_PRESENT,
@@ -1315,8 +1321,17 @@ static struct acpi_driver acpi_battery_driver = {
 
 static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie)
 {
+	unsigned int i;
 	int result;
 
+	for (i = 0; i < ARRAY_SIZE(acpi_battery_blacklist); i++)
+		if (acpi_dev_present(acpi_battery_blacklist[i], "1", -1)) {
+			pr_info(PREFIX ACPI_BATTERY_DEVICE_NAME
+				": found native %s PMIC, not loading\n",
+				acpi_battery_blacklist[i]);
+			return;
+		}
+
 	dmi_check_system(bat_dmi_table);
 
 #ifdef CONFIG_ACPI_PROCFS_POWER
@@ -1329,6 +1344,7 @@ static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie)
 	if (result < 0)
 		acpi_unlock_battery_dir(acpi_battery_dir);
 #endif
+	battery_driver_registered = (result == 0);
 }
 
 static int __init acpi_battery_init(void)
@@ -1343,9 +1359,11 @@ static int __init acpi_battery_init(void)
 static void __exit acpi_battery_exit(void)
 {
 	async_synchronize_cookie(async_cookie + 1);
-	acpi_bus_unregister_driver(&acpi_battery_driver);
+	if (battery_driver_registered)
+		acpi_bus_unregister_driver(&acpi_battery_driver);
 #ifdef CONFIG_ACPI_PROCFS_POWER
-	acpi_unlock_battery_dir(acpi_battery_dir);
+	if (acpi_battery_dir)
+		acpi_unlock_battery_dir(acpi_battery_dir);
 #endif
 }
 
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index 4421f7c9981c..bb542acc0574 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -188,6 +188,14 @@ static struct dmi_system_id acpi_rev_dmi_table[] __initdata = {
 		      DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 3350"),
 		},
 	},
+	{
+	 .callback = dmi_enable_rev_override,
+	 .ident = "DELL Inspiron 7537",
+	 .matches = {
+		      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		      DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7537"),
+		},
+	},
 #endif
 	{}
 };
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 34fbe027e73a..784bda663d16 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -114,6 +114,11 @@ int acpi_bus_get_status(struct acpi_device *device)
 	acpi_status status;
 	unsigned long long sta;
 
+	if (acpi_device_always_present(device)) {
+		acpi_set_device_status(device, ACPI_STA_DEFAULT);
+		return 0;
+	}
+
 	status = acpi_bus_get_status_handle(device->handle, &sta);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 3ca0729f7e0e..6cbe6036da99 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -132,49 +132,54 @@ __ATTR(_name, 0444, show_##_name, NULL)
 
 #define to_cpc_desc(a) container_of(a, struct cpc_desc, kobj)
 
+#define show_cppc_data(access_fn, struct_name, member_name)		\
+	static ssize_t show_##member_name(struct kobject *kobj,		\
+					struct attribute *attr,	char *buf) \
+	{								\
+		struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);		\
+		struct struct_name st_name = {0};			\
+		int ret;						\
+									\
+		ret = access_fn(cpc_ptr->cpu_id, &st_name);		\
+		if (ret)						\
+			return ret;					\
+									\
+		return scnprintf(buf, PAGE_SIZE, "%llu\n",		\
+				(u64)st_name.member_name);		\
+	}								\
+	define_one_cppc_ro(member_name)
+
+show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, highest_perf);
+show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_perf);
+show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_perf);
+show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_nonlinear_perf);
+show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf);
+show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time);
+
 static ssize_t show_feedback_ctrs(struct kobject *kobj,
 		struct attribute *attr, char *buf)
 {
 	struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
 	struct cppc_perf_fb_ctrs fb_ctrs = {0};
+	int ret;
 
-	cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+	ret = cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+	if (ret)
+		return ret;
 
 	return scnprintf(buf, PAGE_SIZE, "ref:%llu del:%llu\n",
 			fb_ctrs.reference, fb_ctrs.delivered);
 }
 define_one_cppc_ro(feedback_ctrs);
 
-static ssize_t show_reference_perf(struct kobject *kobj,
-		struct attribute *attr, char *buf)
-{
-	struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
-	struct cppc_perf_fb_ctrs fb_ctrs = {0};
-
-	cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
-
-	return scnprintf(buf, PAGE_SIZE, "%llu\n",
-			fb_ctrs.reference_perf);
-}
-define_one_cppc_ro(reference_perf);
-
-static ssize_t show_wraparound_time(struct kobject *kobj,
-				struct attribute *attr, char *buf)
-{
-	struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
-	struct cppc_perf_fb_ctrs fb_ctrs = {0};
-
-	cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
-
-	return scnprintf(buf, PAGE_SIZE, "%llu\n", fb_ctrs.ctr_wrap_time);
-
-}
-define_one_cppc_ro(wraparound_time);
-
 static struct attribute *cppc_attrs[] = {
 	&feedback_ctrs.attr,
 	&reference_perf.attr,
 	&wraparound_time.attr,
+	&highest_perf.attr,
+	&lowest_perf.attr,
+	&lowest_nonlinear_perf.attr,
+	&nominal_perf.attr,
 	NULL
 };
 
@@ -972,9 +977,9 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
 int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 {
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
-	struct cpc_register_resource *highest_reg, *lowest_reg, *ref_perf,
-								 *nom_perf;
-	u64 high, low, nom;
+	struct cpc_register_resource *highest_reg, *lowest_reg,
+		*lowest_non_linear_reg, *nominal_reg;
+	u64 high, low, nom, min_nonlinear;
 	int ret = 0, regs_in_pcc = 0;
 
 	if (!cpc_desc) {
@@ -984,12 +989,12 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 
 	highest_reg = &cpc_desc->cpc_regs[HIGHEST_PERF];
 	lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF];
-	ref_perf = &cpc_desc->cpc_regs[REFERENCE_PERF];
-	nom_perf = &cpc_desc->cpc_regs[NOMINAL_PERF];
+	lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF];
+	nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF];
 
 	/* Are any of the regs PCC ?*/
 	if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) ||
-		CPC_IN_PCC(ref_perf) || CPC_IN_PCC(nom_perf)) {
+		CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg)) {
 		regs_in_pcc = 1;
 		down_write(&pcc_data.pcc_lock);
 		/* Ring doorbell once to update PCC subspace */
@@ -1005,10 +1010,13 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 	cpc_read(cpunum, lowest_reg, &low);
 	perf_caps->lowest_perf = low;
 
-	cpc_read(cpunum, nom_perf, &nom);
+	cpc_read(cpunum, nominal_reg, &nom);
 	perf_caps->nominal_perf = nom;
 
-	if (!high || !low || !nom)
+	cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear);
+	perf_caps->lowest_nonlinear_perf = min_nonlinear;
+
+	if (!high || !low || !nom || !min_nonlinear)
 		ret = -EFAULT;
 
 out_err:
@@ -1083,7 +1091,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
 	perf_fb_ctrs->delivered = delivered;
 	perf_fb_ctrs->reference = reference;
 	perf_fb_ctrs->reference_perf = ref_perf;
-	perf_fb_ctrs->ctr_wrap_time = ctr_wrap_time;
+	perf_fb_ctrs->wraparound_time = ctr_wrap_time;
 out_err:
 	if (regs_in_pcc)
 		up_write(&pcc_data.pcc_lock);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index f15900132912..66229ffa909b 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -65,8 +65,6 @@ static inline void acpi_cmos_rtc_init(void) {}
 #endif
 int acpi_rev_override_setup(char *str);
 
-extern bool acpi_force_hot_remove;
-
 void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug,
 				    const char *name);
 int acpi_scan_add_handler_with_hotplug(struct acpi_scan_handler *handler,
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index fcd4ce6f78d5..3a6c9b741b23 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -200,6 +200,7 @@ static int acpi_power_get_list_state(struct list_head *list, int *state)
 		return -EINVAL;
 
 	/* The state of the list is 'on' IFF all resources are 'on'. */
+	cur_state = 0;
 	list_for_each_entry(entry, list, node) {
 		struct acpi_power_resource *resource = entry->resource;
 		acpi_handle handle = resource->device.handle;
@@ -863,6 +864,16 @@ void acpi_resume_power_resources(void)
 
 		mutex_unlock(&resource->resource_lock);
 	}
+
+	mutex_unlock(&power_resource_list_lock);
+}
+
+void acpi_turn_off_unused_power_resources(void)
+{
+	struct acpi_power_resource *resource;
+
+	mutex_lock(&power_resource_list_lock);
+
 	list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) {
 		int result, state;
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2433569b02ef..c26931067415 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -30,12 +30,6 @@ extern struct acpi_device *acpi_root;
 
 #define INVALID_ACPI_HANDLE	((acpi_handle)empty_zero_page)
 
-/*
- * If set, devices will be hot-removed even if they cannot be put offline
- * gracefully (from the kernel's standpoint).
- */
-bool acpi_force_hot_remove;
-
 static const char *dummy_hid = "device";
 
 static LIST_HEAD(acpi_dep_list);
@@ -170,9 +164,6 @@ static acpi_status acpi_bus_offline(acpi_handle handle, u32 lvl, void *data,
 			pn->put_online = false;
 		}
 		ret = device_offline(pn->dev);
-		if (acpi_force_hot_remove)
-			continue;
-
 		if (ret >= 0) {
 			pn->put_online = !ret;
 		} else {
@@ -241,11 +232,11 @@ static int acpi_scan_try_to_offline(struct acpi_device *device)
 		acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
 				    NULL, acpi_bus_offline, (void *)true,
 				    (void **)&errdev);
-		if (!errdev || acpi_force_hot_remove)
+		if (!errdev)
 			acpi_bus_offline(handle, 0, (void *)true,
 					 (void **)&errdev);
 
-		if (errdev && !acpi_force_hot_remove) {
+		if (errdev) {
 			dev_warn(errdev, "Offline failed.\n");
 			acpi_bus_online(handle, 0, NULL, NULL);
 			acpi_walk_namespace(ACPI_TYPE_ANY, handle,
@@ -263,8 +254,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
 	unsigned long long sta;
 	acpi_status status;
 
-	if (device->handler && device->handler->hotplug.demand_offline
-	    && !acpi_force_hot_remove) {
+	if (device->handler && device->handler->hotplug.demand_offline) {
 		if (!acpi_scan_is_offline(device, true))
 			return -EBUSY;
 	} else {
@@ -1850,6 +1840,8 @@ static void acpi_bus_attach(struct acpi_device *device)
 			device->flags.power_manageable = 0;
 
 		device->flags.initialized = true;
+	} else if (device->flags.visited) {
+		goto ok;
 	}
 
 	ret = acpi_scan_attach_handler(device);
@@ -1866,10 +1858,10 @@ static void acpi_bus_attach(struct acpi_device *device)
 	if (ret < 0)
 		return;
 
-	if (ret > 0 || !device->pnp.type.platform_id)
-		acpi_device_set_enumerated(device);
-	else
+	if (device->pnp.type.platform_id)
 		acpi_default_enumeration(device);
+	else
+		acpi_device_set_enumerated(device);
 
  ok:
 	list_for_each_entry(child, &device->children, node)
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index a4327af676fe..097d630ab886 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -474,6 +474,7 @@ static void acpi_pm_start(u32 acpi_state)
  */
 static void acpi_pm_end(void)
 {
+	acpi_turn_off_unused_power_resources();
 	acpi_scan_lock_release();
 	/*
 	 * This is necessary in case acpi_pm_finish() is not called during a
diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h
index a9cc34e663f9..a82ff74faf7a 100644
--- a/drivers/acpi/sleep.h
+++ b/drivers/acpi/sleep.h
@@ -6,6 +6,7 @@ extern struct list_head acpi_wakeup_device_list;
 extern struct mutex acpi_device_lock;
 
 extern void acpi_resume_power_resources(void);
+extern void acpi_turn_off_unused_power_resources(void);
 
 static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
 {
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index cf05ae973381..1b5ee1e0e5a3 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -921,7 +921,7 @@ void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug,
 static ssize_t force_remove_show(struct kobject *kobj,
 				 struct kobj_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%d\n", !!acpi_force_hot_remove);
+	return sprintf(buf, "%d\n", 0);
 }
 
 static ssize_t force_remove_store(struct kobject *kobj,
@@ -935,9 +935,10 @@ static ssize_t force_remove_store(struct kobject *kobj,
 	if (ret < 0)
 		return ret;
 
-	lock_device_hotplug();
-	acpi_force_hot_remove = val;
-	unlock_device_hotplug();
+	if (val) {
+		pr_err("Enabling force_remove is not supported anymore. Please report to linux-acpi@vger.kernel.org if you depend on this functionality\n");
+		return -EINVAL;
+	}
 	return size;
 }
 
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2604189d6cd1..0dae722ab2ec 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -311,22 +311,6 @@ acpi_parse_entries_array(char *id, unsigned long table_size,
 }
 
 int __init
-acpi_parse_entries(char *id,
-			unsigned long table_size,
-			acpi_tbl_entry_handler handler,
-			struct acpi_table_header *table_header,
-			int entry_id, unsigned int max_entries)
-{
-	struct acpi_subtable_proc proc = {
-		.id		= entry_id,
-		.handler	= handler,
-	};
-
-	return acpi_parse_entries_array(id, table_size, table_header,
-			&proc, 1, max_entries);
-}
-
-int __init
 acpi_table_parse_entries_array(char *id,
 			 unsigned long table_size,
 			 struct acpi_subtable_proc *proc, int proc_num,
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 22c09952e177..27d0dcfcf47d 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -736,6 +736,72 @@ bool acpi_dev_found(const char *hid)
 }
 EXPORT_SYMBOL(acpi_dev_found);
 
+struct acpi_dev_present_info {
+	struct acpi_device_id hid[2];
+	const char *uid;
+	s64 hrv;
+};
+
+static int acpi_dev_present_cb(struct device *dev, void *data)
+{
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct acpi_dev_present_info *match = data;
+	unsigned long long hrv;
+	acpi_status status;
+
+	if (acpi_match_device_ids(adev, match->hid))
+		return 0;
+
+	if (match->uid && (!adev->pnp.unique_id ||
+	    strcmp(adev->pnp.unique_id, match->uid)))
+		return 0;
+
+	if (match->hrv == -1)
+		return 1;
+
+	status = acpi_evaluate_integer(adev->handle, "_HRV", NULL, &hrv);
+	if (ACPI_FAILURE(status))
+		return 0;
+
+	return hrv == match->hrv;
+}
+
+/**
+ * acpi_dev_present - Detect that a given ACPI device is present
+ * @hid: Hardware ID of the device.
+ * @uid: Unique ID of the device, pass NULL to not check _UID
+ * @hrv: Hardware Revision of the device, pass -1 to not check _HRV
+ *
+ * Return %true if a matching device was present at the moment of invocation.
+ * Note that if the device is pluggable, it may since have disappeared.
+ *
+ * Note that unlike acpi_dev_found() this function checks the status
+ * of the device. So for devices which are present in the dsdt, but
+ * which are disabled (their _STA callback returns 0) this function
+ * will return false.
+ *
+ * For this function to work, acpi_bus_scan() must have been executed
+ * which happens in the subsys_initcall() subsection. Hence, do not
+ * call from a subsys_initcall() or earlier (use acpi_get_devices()
+ * instead). Calling from module_init() is fine (which is synonymous
+ * with device_initcall()).
+ */
+bool acpi_dev_present(const char *hid, const char *uid, s64 hrv)
+{
+	struct acpi_dev_present_info match = {};
+	struct device *dev;
+
+	strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id));
+	match.uid = uid;
+	match.hrv = hrv;
+
+	dev = bus_find_device(&acpi_bus_type, NULL, &match,
+			      acpi_dev_present_cb);
+
+	return !!dev;
+}
+EXPORT_SYMBOL(acpi_dev_present);
+
 /*
  * acpi_backlight= handling, this is done here rather then in video_detect.c
  * because __setup cannot be used in modules.
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
new file mode 100644
index 000000000000..bd86b809c848
--- /dev/null
+++ b/drivers/acpi/x86/utils.c
@@ -0,0 +1,90 @@
+/*
+ * X86 ACPI Utility Functions
+ *
+ * Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com>
+ *
+ * Based on various non upstream patches to support the CHT Whiskey Cove PMIC:
+ * Copyright (C) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include "../internal.h"
+
+/*
+ * Some ACPI devices are hidden (status == 0x0) in recent BIOS-es because
+ * some recent Windows drivers bind to one device but poke at multiple
+ * devices at the same time, so the others get hidden.
+ * We work around this by always reporting ACPI_STA_DEFAULT for these
+ * devices. Note this MUST only be done for devices where this is safe.
+ *
+ * This forcing of devices to be present is limited to specific CPU (SoC)
+ * models both to avoid potentially causing trouble on other models and
+ * because some HIDs are re-used on different SoCs for completely
+ * different devices.
+ */
+struct always_present_id {
+	struct acpi_device_id hid[2];
+	struct x86_cpu_id cpu_ids[2];
+	const char *uid;
+};
+
+#define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
+
+#define ENTRY(hid, uid, cpu_models) {					\
+	{ { hid, }, {} },						\
+	{ cpu_models, {} },						\
+	uid,								\
+}
+
+static const struct always_present_id always_present_ids[] = {
+	/*
+	 * Bay / Cherry Trail PWM directly poked by GPU driver in win10,
+	 * but Linux uses a separate PWM driver, harmless if not used.
+	 */
+	ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1)),
+	ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+	/*
+	 * The INT0002 device is necessary to clear wakeup interrupt sources
+	 * on Cherry Trail devices, without it we get nobody cared IRQ msgs.
+	 */
+	ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT)),
+};
+
+bool acpi_device_always_present(struct acpi_device *adev)
+{
+	u32 *status = (u32 *)&adev->status;
+	u32 old_status = *status;
+	bool ret = false;
+	unsigned int i;
+
+	/* acpi_match_device_ids checks status, so set it to default */
+	*status = ACPI_STA_DEFAULT;
+	for (i = 0; i < ARRAY_SIZE(always_present_ids); i++) {
+		if (acpi_match_device_ids(adev, always_present_ids[i].hid))
+			continue;
+
+		if (!adev->pnp.unique_id ||
+		    strcmp(adev->pnp.unique_id, always_present_ids[i].uid))
+			continue;
+
+		if (!x86_match_cpu(always_present_ids[i].cpu_ids))
+			continue;
+
+		if (old_status != ACPI_STA_DEFAULT) /* Log only once */
+			dev_info(&adev->dev,
+				 "Device [%s] is in always present list\n",
+				 adev->pnp.bus_id);
+
+		ret = true;
+		break;
+	}
+	*status = old_status;
+
+	return ret;
+}
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 70b57d2229d6..ff6cb9e4c381 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -14,7 +14,6 @@ menuconfig ATA
 	tristate "Serial ATA and Parallel ATA drivers (libata)"
 	depends on HAS_IOMEM
 	depends on BLOCK
-	depends on !(M32R || S390) || BROKEN
 	select SCSI
 	select GLOB
 	---help---
@@ -118,6 +117,15 @@ config AHCI_DA850
 
 	  If unsure, say N.
 
+config AHCI_DM816
+	tristate "DaVinci DM816 AHCI SATA support"
+	depends on ARCH_OMAP2PLUS
+	help
+	  This option enables support for the DaVinci DM816 SoC's
+	  onboard AHCI SATA controller.
+
+	  If unsure, say N.
+
 config AHCI_ST
 	tristate "ST AHCI SATA support"
 	depends on ARCH_STI
@@ -885,14 +893,6 @@ config PATA_AT32
 
 	  If unsure, say N.
 
-config PATA_AT91
-	tristate "PATA support for AT91SAM9260"
-	depends on ARM && SOC_AT91SAM9
-	help
-	  This option enables support for IDE devices on the Atmel AT91SAM9260 SoC.
-
-	  If unsure, say N.
-
 config PATA_CMD640_PCI
 	tristate "CMD640 PCI PATA support (Experimental)"
 	depends on PCI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 89a0a1915d36..3048cc100a46 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_SATA_HIGHBANK)	+= sata_highbank.o libahci.o
 obj-$(CONFIG_AHCI_BRCM)		+= ahci_brcm.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_CEVA)		+= ahci_ceva.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_DA850)	+= ahci_da850.o libahci.o libahci_platform.o
+obj-$(CONFIG_AHCI_DM816)	+= ahci_dm816.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_IMX)		+= ahci_imx.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_MVEBU)	+= ahci_mvebu.o libahci.o libahci_platform.o
 obj-$(CONFIG_AHCI_OCTEON)	+= ahci_octeon.o
@@ -91,7 +92,6 @@ obj-$(CONFIG_PATA_WINBOND)	+= pata_sl82c105.o
 
 # SFF PIO only
 obj-$(CONFIG_PATA_AT32)		+= pata_at32.o
-obj-$(CONFIG_PATA_AT91)		+= pata_at91.o
 obj-$(CONFIG_PATA_CMD640_PCI)	+= pata_cmd640.o
 obj-$(CONFIG_PATA_FALCON)	+= pata_falcon.o
 obj-$(CONFIG_PATA_ISAPNP)	+= pata_isapnp.o
diff --git a/drivers/ata/ahci_dm816.c b/drivers/ata/ahci_dm816.c
new file mode 100644
index 000000000000..fbd827c3a75c
--- /dev/null
+++ b/drivers/ata/ahci_dm816.c
@@ -0,0 +1,200 @@
+/*
+ * DaVinci DM816 AHCI SATA platform driver
+ *
+ * Copyright (C) 2017 BayLibre SAS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/libata.h>
+#include <linux/ahci_platform.h>
+
+#include "ahci.h"
+
+#define AHCI_DM816_DRV_NAME		"ahci-dm816"
+
+#define AHCI_DM816_PHY_ENPLL(x)		((x) << 0)
+#define AHCI_DM816_PHY_MPY(x)		((x) << 1)
+#define AHCI_DM816_PHY_LOS(x)		((x) << 12)
+#define AHCI_DM816_PHY_RXCDR(x)		((x) << 13)
+#define AHCI_DM816_PHY_RXEQ(x)		((x) << 16)
+#define AHCI_DM816_PHY_TXSWING(x)	((x) << 23)
+
+#define AHCI_DM816_P0PHYCR_REG		0x178
+#define AHCI_DM816_P1PHYCR_REG		0x1f8
+
+#define AHCI_DM816_PLL_OUT		1500000000LU
+
+static const unsigned long pll_mpy_table[] = {
+	  400,  500,  600,  800,  825, 1000, 1200,
+	 1250, 1500, 1600, 1650, 2000, 2200, 2500
+};
+
+static int ahci_dm816_get_mpy_bits(unsigned long refclk_rate)
+{
+	unsigned long pll_multiplier;
+	int i;
+
+	/*
+	 * We need to determine the value of the multiplier (MPY) bits.
+	 * In order to include the 8.25 multiplier we need to first divide
+	 * the refclk rate by 100.
+	 */
+	pll_multiplier = AHCI_DM816_PLL_OUT / (refclk_rate / 100);
+
+	for (i = 0; i < ARRAY_SIZE(pll_mpy_table); i++) {
+		if (pll_mpy_table[i] == pll_multiplier)
+			return i;
+	}
+
+	/*
+	 * We should have divided evenly - if not, return an invalid
+	 * value.
+	 */
+	return -1;
+}
+
+static int ahci_dm816_phy_init(struct ahci_host_priv *hpriv, struct device *dev)
+{
+	unsigned long refclk_rate;
+	int mpy;
+	u32 val;
+
+	/*
+	 * We should have been supplied two clocks: the functional and
+	 * keep-alive clock and the external reference clock. We need the
+	 * rate of the latter to calculate the correct value of MPY bits.
+	 */
+	if (!hpriv->clks[1]) {
+		dev_err(dev, "reference clock not supplied\n");
+		return -EINVAL;
+	}
+
+	refclk_rate = clk_get_rate(hpriv->clks[1]);
+	if ((refclk_rate % 100) != 0) {
+		dev_err(dev, "reference clock rate must be divisible by 100\n");
+		return -EINVAL;
+	}
+
+	mpy = ahci_dm816_get_mpy_bits(refclk_rate);
+	if (mpy < 0) {
+		dev_err(dev, "can't calculate the MPY bits value\n");
+		return -EINVAL;
+	}
+
+	/* Enable the PHY and configure the first HBA port. */
+	val = AHCI_DM816_PHY_MPY(mpy) | AHCI_DM816_PHY_LOS(1) |
+	      AHCI_DM816_PHY_RXCDR(4) | AHCI_DM816_PHY_RXEQ(1) |
+	      AHCI_DM816_PHY_TXSWING(3) | AHCI_DM816_PHY_ENPLL(1);
+	writel(val, hpriv->mmio + AHCI_DM816_P0PHYCR_REG);
+
+	/* Configure the second HBA port. */
+	val = AHCI_DM816_PHY_LOS(1) | AHCI_DM816_PHY_RXCDR(4) |
+	      AHCI_DM816_PHY_RXEQ(1) | AHCI_DM816_PHY_TXSWING(3);
+	writel(val, hpriv->mmio + AHCI_DM816_P1PHYCR_REG);
+
+	return 0;
+}
+
+static int ahci_dm816_softreset(struct ata_link *link,
+				unsigned int *class, unsigned long deadline)
+{
+	int pmp, ret;
+
+	pmp = sata_srst_pmp(link);
+
+	/*
+	 * There's an issue with the SATA controller on DM816 SoC: if we
+	 * enable Port Multiplier support, but the drive is connected directly
+	 * to the board, it can't be detected. As a workaround: if PMP is
+	 * enabled, we first call ahci_do_softreset() and pass it the result of
+	 * sata_srst_pmp(). If this call fails, we retry with pmp = 0.
+	 */
+	ret = ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready);
+	if (pmp && ret == -EBUSY)
+		return ahci_do_softreset(link, class, 0,
+					 deadline, ahci_check_ready);
+
+	return ret;
+}
+
+static struct ata_port_operations ahci_dm816_port_ops = {
+	.inherits = &ahci_platform_ops,
+	.softreset = ahci_dm816_softreset,
+};
+
+static const struct ata_port_info ahci_dm816_port_info = {
+	.flags		= AHCI_FLAG_COMMON,
+	.pio_mask	= ATA_PIO4,
+	.udma_mask	= ATA_UDMA6,
+	.port_ops	= &ahci_dm816_port_ops,
+};
+
+static struct scsi_host_template ahci_dm816_platform_sht = {
+	AHCI_SHT(AHCI_DM816_DRV_NAME),
+};
+
+static int ahci_dm816_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ahci_host_priv *hpriv;
+	int rc;
+
+	hpriv = ahci_platform_get_resources(pdev);
+	if (IS_ERR(hpriv))
+		return PTR_ERR(hpriv);
+
+	rc = ahci_platform_enable_resources(hpriv);
+	if (rc)
+		return rc;
+
+	rc = ahci_dm816_phy_init(hpriv, dev);
+	if (rc)
+		goto disable_resources;
+
+	rc = ahci_platform_init_host(pdev, hpriv,
+				     &ahci_dm816_port_info,
+				     &ahci_dm816_platform_sht);
+	if (rc)
+		goto disable_resources;
+
+	return 0;
+
+disable_resources:
+	ahci_platform_disable_resources(hpriv);
+
+	return rc;
+}
+
+static SIMPLE_DEV_PM_OPS(ahci_dm816_pm_ops,
+			 ahci_platform_suspend,
+			 ahci_platform_resume);
+
+static const struct of_device_id ahci_dm816_of_match[] = {
+	{ .compatible = "ti,dm816-ahci", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ahci_dm816_of_match);
+
+static struct platform_driver ahci_dm816_driver = {
+	.probe = ahci_dm816_probe,
+	.remove = ata_platform_remove_one,
+	.driver = {
+		.name = AHCI_DM816_DRV_NAME,
+		.of_match_table = ahci_dm816_of_match,
+		.pm = &ahci_dm816_pm_ops,
+	},
+};
+module_platform_driver(ahci_dm816_driver);
+
+MODULE_DESCRIPTION("DaVinci DM816 AHCI SATA platform driver");
+MODULE_AUTHOR("Bartosz Golaszewski <bgolaszewski@baylibre.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/ata/ahci_octeon.c b/drivers/ata/ahci_octeon.c
index ea865fe953e1..5a44e089c6bb 100644
--- a/drivers/ata/ahci_octeon.c
+++ b/drivers/ata/ahci_octeon.c
@@ -38,11 +38,6 @@ static int ahci_octeon_probe(struct platform_device *pdev)
 	int ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "Platform resource[0] is missing\n");
-		return -ENODEV;
-	}
-
 	base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index ca75823697dd..2d83b8c75965 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4910,7 +4910,7 @@ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
  *	LOCKING:
  *	spin_lock_irqsave(host lock)
  */
-void ata_sg_clean(struct ata_queued_cmd *qc)
+static void ata_sg_clean(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct scatterlist *sg = qc->sg;
@@ -5902,9 +5902,9 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
 	init_completion(&ap->park_req_pending);
-	init_timer_deferrable(&ap->fastdrain_timer);
-	ap->fastdrain_timer.function = ata_eh_fastdrain_timerfn;
-	ap->fastdrain_timer.data = (unsigned long)ap;
+	setup_deferrable_timer(&ap->fastdrain_timer,
+			       ata_eh_fastdrain_timerfn,
+			       (unsigned long)ap);
 
 	ap->cbl = ATA_CBL_NONE;
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 1ac70744ae7b..49ba9834c715 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3393,46 +3393,6 @@ static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax,
 }
 
 /**
- * ata_format_dsm_trim_descr() - SATL Write Same to ATA SCT Write Same
- * @cmd: SCSI command being translated
- * @lba: Starting sector
- * @num: Number of sectors to be zero'd.
- *
- * Rewrite the WRITE SAME payload to be an SCT Write Same formatted
- * descriptor.
- * NOTE: Writes a pattern (0's) in the foreground.
- *
- * Return: Number of bytes copied into sglist.
- */
-static size_t ata_format_sct_write_same(struct scsi_cmnd *cmd, u64 lba, u64 num)
-{
-	struct scsi_device *sdp = cmd->device;
-	size_t len = sdp->sector_size;
-	size_t r;
-	u16 *buf;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);
-	buf = ((void *)ata_scsi_rbuf);
-
-	put_unaligned_le16(0x0002,  &buf[0]); /* SCT_ACT_WRITE_SAME */
-	put_unaligned_le16(0x0101,  &buf[1]); /* WRITE PTRN FG */
-	put_unaligned_le64(lba,     &buf[2]);
-	put_unaligned_le64(num,     &buf[6]);
-	put_unaligned_le32(0u,      &buf[10]); /* pattern */
-
-	WARN_ON(len > ATA_SCSI_RBUF_SIZE);
-
-	if (len > ATA_SCSI_RBUF_SIZE)
-		len = ATA_SCSI_RBUF_SIZE;
-
-	r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len);
-	spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
-
-	return r;
-}
-
-/**
  * ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same
  * @qc: Command to be translated
  *
@@ -3462,32 +3422,31 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 	if (unlikely(!dev->dma_mode))
 		goto invalid_opcode;
 
+	/*
+	 * We only allow sending this command through the block layer,
+	 * as it modifies the DATA OUT buffer, which would corrupt user
+	 * memory for SG_IO commands.
+	 */
+	if (unlikely(blk_rq_is_passthrough(scmd->request)))
+		goto invalid_opcode;
+
 	if (unlikely(scmd->cmd_len < 16)) {
 		fp = 15;
 		goto invalid_fld;
 	}
 	scsi_16_lba_len(cdb, &block, &n_block);
 
-	if (unmap) {
-		/* If trim is not enabled the cmd is invalid. */
-		if ((dev->horkage & ATA_HORKAGE_NOTRIM) ||
-		    !ata_id_has_trim(dev->id)) {
-			fp = 1;
-			bp = 3;
-			goto invalid_fld;
-		}
-		/* If the request is too large the cmd is invalid */
-		if (n_block > 0xffff * trmax) {
-			fp = 2;
-			goto invalid_fld;
-		}
-	} else {
-		/* If write same is not available the cmd is invalid */
-		if (!ata_id_sct_write_same(dev->id)) {
-			fp = 1;
-			bp = 3;
-			goto invalid_fld;
-		}
+	if (!unmap ||
+	    (dev->horkage & ATA_HORKAGE_NOTRIM) ||
+	    !ata_id_has_trim(dev->id)) {
+		fp = 1;
+		bp = 3;
+		goto invalid_fld;
+	}
+	/* If the request is too large the cmd is invalid */
+	if (n_block > 0xffff * trmax) {
+		fp = 2;
+		goto invalid_fld;
 	}
 
 	/*
@@ -3502,49 +3461,28 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 	 * For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count)
 	 * is defined as number of 512 byte blocks to be transferred.
 	 */
-	if (unmap) {
-		size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block);
-		if (size != len)
-			goto invalid_param_len;
 
-		if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
-			/* Newer devices support queued TRIM commands */
-			tf->protocol = ATA_PROT_NCQ;
-			tf->command = ATA_CMD_FPDMA_SEND;
-			tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
-			tf->nsect = qc->tag << 3;
-			tf->hob_feature = (size / 512) >> 8;
-			tf->feature = size / 512;
+	size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block);
+	if (size != len)
+		goto invalid_param_len;
 
-			tf->auxiliary = 1;
-		} else {
-			tf->protocol = ATA_PROT_DMA;
-			tf->hob_feature = 0;
-			tf->feature = ATA_DSM_TRIM;
-			tf->hob_nsect = (size / 512) >> 8;
-			tf->nsect = size / 512;
-			tf->command = ATA_CMD_DSM;
-		}
-	} else {
-		size = ata_format_sct_write_same(scmd, block, n_block);
-		if (size != len)
-			goto invalid_param_len;
+	if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
+		/* Newer devices support queued TRIM commands */
+		tf->protocol = ATA_PROT_NCQ;
+		tf->command = ATA_CMD_FPDMA_SEND;
+		tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
+		tf->nsect = qc->tag << 3;
+		tf->hob_feature = (size / 512) >> 8;
+		tf->feature = size / 512;
 
-		tf->hob_feature = 0;
-		tf->feature = 0;
-		tf->hob_nsect = 0;
-		tf->nsect = 1;
-		tf->lbah = 0;
-		tf->lbam = 0;
-		tf->lbal = ATA_CMD_STANDBYNOW1;
-		tf->hob_lbah = 0;
-		tf->hob_lbam = 0;
-		tf->hob_lbal = 0;
-		tf->device = ATA_CMD_STANDBYNOW1;
+		tf->auxiliary = 1;
+	} else {
 		tf->protocol = ATA_PROT_DMA;
-		tf->command = ATA_CMD_WRITE_LOG_DMA_EXT;
-		if (unlikely(dev->flags & ATA_DFLAG_PIO))
-			tf->command = ATA_CMD_WRITE_LOG_EXT;
+		tf->hob_feature = 0;
+		tf->feature = ATA_DSM_TRIM;
+		tf->hob_nsect = (size / 512) >> 8;
+		tf->nsect = size / 512;
+		tf->command = ATA_CMD_DSM;
 	}
 
 	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 |
@@ -3619,10 +3557,6 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf)
 	case START_STOP:
 		supported = 3;
 		break;
-	case WRITE_SAME_16:
-		if (!ata_id_sct_write_same(dev->id))
-			break;
-		/* fallthrough: if SCT ... only enable for ZBC */
 	case ZBC_IN:
 	case ZBC_OUT:
 		if (ata_id_zoned_cap(dev->id) ||
diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c
deleted file mode 100644
index fd5b34f0d007..000000000000
--- a/drivers/ata/pata_at91.c
+++ /dev/null
@@ -1,503 +0,0 @@
-/*
- * PATA driver for AT91SAM9260 Static Memory Controller
- * with CompactFlash interface in True IDE mode
- *
- * Copyright (C) 2009 Matyukevich Sergey
- *               2011 Igor Plyatov
- *
- * Based on:
- *      * generic platform driver by Paul Mundt: drivers/ata/pata_platform.c
- *      * pata_at32 driver by Kristoffer Nyborg Gregertsen
- *      * at91_ide driver by Stanislaw Gruszka
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/blkdev.h>
-#include <linux/gfp.h>
-#include <scsi/scsi_host.h>
-#include <linux/ata.h>
-#include <linux/clk.h>
-#include <linux/libata.h>
-#include <linux/mfd/syscon.h>
-#include <linux/mfd/syscon/atmel-smc.h>
-#include <linux/platform_device.h>
-#include <linux/ata_platform.h>
-#include <linux/platform_data/atmel.h>
-#include <linux/regmap.h>
-#include <linux/gpio.h>
-
-#define DRV_NAME		"pata_at91"
-#define DRV_VERSION		"0.3"
-
-#define CF_IDE_OFFSET		0x00c00000
-#define CF_ALT_IDE_OFFSET	0x00e00000
-#define CF_IDE_RES_SIZE		0x08
-#define CS_PULSE_MAXIMUM	319
-#define ER_SMC_CALC		1
-#define ER_SMC_RECALC		2
-
-struct at91_ide_info {
-	unsigned long mode;
-	unsigned int cs;
-	struct clk *mck;
-	void __iomem *ide_addr;
-	void __iomem *alt_addr;
-};
-
-/**
- * struct smc_range - range of valid values for SMC register.
- */
-struct smc_range {
-	int min;
-	int max;
-};
-
-struct regmap *smc;
-
-struct at91sam9_smc_generic_fields {
-	struct regmap_field *setup;
-	struct regmap_field *pulse;
-	struct regmap_field *cycle;
-	struct regmap_field *mode;
-} fields;
-
-/**
- * adjust_smc_value - adjust value for one of SMC registers.
- * @value: adjusted value
- * @range: array of SMC ranges with valid values
- * @size: SMC ranges array size
- *
- * This returns the difference between input and output value or negative
- * in case of invalid input value.
- * If negative returned, then output value = maximal possible from ranges.
- */
-static int adjust_smc_value(int *value, struct smc_range *range, int size)
-{
-	int maximum = (range + size - 1)->max;
-	int remainder;
-
-	do {
-		if (*value < range->min) {
-			remainder = range->min - *value;
-			*value = range->min; /* nearest valid value */
-			return remainder;
-		} else if ((range->min <= *value) && (*value <= range->max))
-			return 0;
-
-		range++;
-	} while (--size);
-	*value = maximum;
-
-	return -1; /* invalid value */
-}
-
-/**
- * calc_smc_vals - calculate SMC register values
- * @dev: ATA device
- * @setup: SMC_SETUP register value
- * @pulse: SMC_PULSE register value
- * @cycle: SMC_CYCLE register value
- *
- * This returns negative in case of invalid values for SMC registers:
- * -ER_SMC_RECALC - recalculation required for SMC values,
- * -ER_SMC_CALC - calculation failed (invalid input values).
- *
- * SMC use special coding scheme, see "Coding and Range of Timing
- * Parameters" table from AT91SAM9 datasheets.
- *
- *	SMC_SETUP = 128*setup[5] + setup[4:0]
- *	SMC_PULSE = 256*pulse[6] + pulse[5:0]
- *	SMC_CYCLE = 256*cycle[8:7] + cycle[6:0]
- */
-static int calc_smc_vals(struct device *dev,
-		int *setup, int *pulse, int *cycle, int *cs_pulse)
-{
-	int ret_val;
-	int err = 0;
-	struct smc_range range_setup[] = {	/* SMC_SETUP valid values */
-		{.min = 0,	.max = 31},	/* first  range */
-		{.min = 128,	.max = 159}	/* second range */
-	};
-	struct smc_range range_pulse[] = {	/* SMC_PULSE valid values */
-		{.min = 0,	.max = 63},	/* first  range */
-		{.min = 256,	.max = 319}	/* second range */
-	};
-	struct smc_range range_cycle[] = {	/* SMC_CYCLE valid values */
-		{.min = 0,	.max = 127},	/* first  range */
-		{.min = 256,	.max = 383},	/* second range */
-		{.min = 512,	.max = 639},	/* third  range */
-		{.min = 768,	.max = 895}	/* fourth range */
-	};
-
-	ret_val = adjust_smc_value(setup, range_setup, ARRAY_SIZE(range_setup));
-	if (ret_val < 0)
-		dev_warn(dev, "maximal SMC Setup value\n");
-	else
-		*cycle += ret_val;
-
-	ret_val = adjust_smc_value(pulse, range_pulse, ARRAY_SIZE(range_pulse));
-	if (ret_val < 0)
-		dev_warn(dev, "maximal SMC Pulse value\n");
-	else
-		*cycle += ret_val;
-
-	ret_val = adjust_smc_value(cycle, range_cycle, ARRAY_SIZE(range_cycle));
-	if (ret_val < 0)
-		dev_warn(dev, "maximal SMC Cycle value\n");
-
-	*cs_pulse = *cycle;
-	if (*cs_pulse > CS_PULSE_MAXIMUM) {
-		dev_err(dev, "unable to calculate valid SMC settings\n");
-		return -ER_SMC_CALC;
-	}
-
-	ret_val = adjust_smc_value(cs_pulse, range_pulse,
-					ARRAY_SIZE(range_pulse));
-	if (ret_val < 0) {
-		dev_warn(dev, "maximal SMC CS Pulse value\n");
-	} else if (ret_val != 0) {
-		*cycle = *cs_pulse;
-		dev_warn(dev, "SMC Cycle extended\n");
-		err = -ER_SMC_RECALC;
-	}
-
-	return err;
-}
-
-/**
- * to_smc_format - convert values into SMC format
- * @setup: SETUP value of SMC Setup Register
- * @pulse: PULSE value of SMC Pulse Register
- * @cycle: CYCLE value of SMC Cycle Register
- * @cs_pulse: NCS_PULSE value of SMC Pulse Register
- */
-static void to_smc_format(int *setup, int *pulse, int *cycle, int *cs_pulse)
-{
-	*setup = (*setup & 0x1f) | ((*setup & 0x80) >> 2);
-	*pulse = (*pulse & 0x3f) | ((*pulse & 0x100) >> 2);
-	*cycle = (*cycle & 0x7f) | ((*cycle & 0x300) >> 1);
-	*cs_pulse = (*cs_pulse & 0x3f) | ((*cs_pulse & 0x100) >> 2);
-}
-
-static unsigned long calc_mck_cycles(unsigned long ns, unsigned long mck_hz)
-{
-	unsigned long mul;
-
-	/*
-	* cycles = x [nsec] * f [Hz] / 10^9 [ns in sec] =
-	*     x * (f / 1_000_000_000) =
-	*     x * ((f * 65536) / 1_000_000_000) / 65536 =
-	*     x * (((f / 10_000) * 65536) / 100_000) / 65536 =
-	*/
-
-	mul = (mck_hz / 10000) << 16;
-	mul /= 100000;
-
-	return (ns * mul + 65536) >> 16;    /* rounding */
-}
-
-/**
- * set_smc_timing - SMC timings setup.
- * @dev: device
- * @info: AT91 IDE info
- * @ata: ATA timings
- *
- * Its assumed that write timings are same as read timings,
- * cs_setup = 0 and cs_pulse = cycle.
- */
-static void set_smc_timing(struct device *dev, struct ata_device *adev,
-		struct at91_ide_info *info, const struct ata_timing *ata)
-{
-	int ret = 0;
-	int use_iordy;
-	unsigned int t6z;         /* data tristate time in ns */
-	unsigned int cycle;       /* SMC Cycle width in MCK ticks */
-	unsigned int setup;       /* SMC Setup width in MCK ticks */
-	unsigned int pulse;       /* CFIOR and CFIOW pulse width in MCK ticks */
-	unsigned int cs_pulse;    /* CS4 or CS5 pulse width in MCK ticks*/
-	unsigned int tdf_cycles;  /* SMC TDF MCK ticks */
-	unsigned long mck_hz;     /* MCK frequency in Hz */
-
-	t6z = (ata->mode < XFER_PIO_5) ? 30 : 20;
-	mck_hz = clk_get_rate(info->mck);
-	cycle = calc_mck_cycles(ata->cyc8b, mck_hz);
-	setup = calc_mck_cycles(ata->setup, mck_hz);
-	pulse = calc_mck_cycles(ata->act8b, mck_hz);
-	tdf_cycles = calc_mck_cycles(t6z, mck_hz);
-
-	do {
-		ret = calc_smc_vals(dev, &setup, &pulse, &cycle, &cs_pulse);
-	} while (ret == -ER_SMC_RECALC);
-
-	if (ret == -ER_SMC_CALC)
-		dev_err(dev, "Interface may not operate correctly\n");
-
-	dev_dbg(dev, "SMC Setup=%u, Pulse=%u, Cycle=%u, CS Pulse=%u\n",
-		setup, pulse, cycle, cs_pulse);
-	to_smc_format(&setup, &pulse, &cycle, &cs_pulse);
-	/* disable or enable waiting for IORDY signal */
-	use_iordy = ata_pio_need_iordy(adev);
-	if (use_iordy)
-		info->mode |= AT91_SMC_EXNWMODE_READY;
-
-	if (tdf_cycles > 15) {
-		tdf_cycles = 15;
-		dev_warn(dev, "maximal SMC TDF Cycles value\n");
-	}
-
-	dev_dbg(dev, "Use IORDY=%u, TDF Cycles=%u\n", use_iordy, tdf_cycles);
-
-	regmap_fields_write(fields.setup, info->cs,
-			    AT91SAM9_SMC_NRDSETUP(setup) |
-			    AT91SAM9_SMC_NWESETUP(setup) |
-			    AT91SAM9_SMC_NCS_NRDSETUP(0) |
-			    AT91SAM9_SMC_NCS_WRSETUP(0));
-	regmap_fields_write(fields.pulse, info->cs,
-			    AT91SAM9_SMC_NRDPULSE(pulse) |
-			    AT91SAM9_SMC_NWEPULSE(pulse) |
-			    AT91SAM9_SMC_NCS_NRDPULSE(cs_pulse) |
-			    AT91SAM9_SMC_NCS_WRPULSE(cs_pulse));
-	regmap_fields_write(fields.cycle, info->cs,
-			    AT91SAM9_SMC_NRDCYCLE(cycle) |
-			    AT91SAM9_SMC_NWECYCLE(cycle));
-	regmap_fields_write(fields.mode, info->cs, info->mode |
-			    AT91_SMC_TDF_(tdf_cycles));
-}
-
-static void pata_at91_set_piomode(struct ata_port *ap, struct ata_device *adev)
-{
-	struct at91_ide_info *info = ap->host->private_data;
-	struct ata_timing timing;
-	int ret;
-
-	/* Compute ATA timing and set it to SMC */
-	ret = ata_timing_compute(adev, adev->pio_mode, &timing, 1000, 0);
-	if (ret) {
-		dev_warn(ap->dev, "Failed to compute ATA timing %d, "
-			 "set PIO_0 timing\n", ret);
-		timing = *ata_timing_find_mode(XFER_PIO_0);
-	}
-	set_smc_timing(ap->dev, adev, info, &timing);
-}
-
-static unsigned int pata_at91_data_xfer_noirq(struct ata_queued_cmd *qc,
-		unsigned char *buf, unsigned int buflen, int rw)
-{
-	struct at91_ide_info *info = qc->dev->link->ap->host->private_data;
-	unsigned int consumed;
-	unsigned int mode;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	regmap_fields_read(fields.mode, info->cs, &mode);
-
-	/* set 16bit mode before writing data */
-	regmap_fields_write(fields.mode, info->cs, (mode & ~AT91_SMC_DBW) |
-			    AT91_SMC_DBW_16);
-
-	consumed = ata_sff_data_xfer(qc, buf, buflen, rw);
-
-	/* restore 8bit mode after data is written */
-	regmap_fields_write(fields.mode, info->cs, (mode & ~AT91_SMC_DBW) |
-			    AT91_SMC_DBW_8);
-
-	local_irq_restore(flags);
-	return consumed;
-}
-
-static struct scsi_host_template pata_at91_sht = {
-	ATA_PIO_SHT(DRV_NAME),
-};
-
-static struct ata_port_operations pata_at91_port_ops = {
-	.inherits	= &ata_sff_port_ops,
-
-	.sff_data_xfer	= pata_at91_data_xfer_noirq,
-	.set_piomode	= pata_at91_set_piomode,
-	.cable_detect	= ata_cable_40wire,
-};
-
-static int at91sam9_smc_fields_init(struct device *dev)
-{
-	struct reg_field field = REG_FIELD(0, 0, 31);
-
-	field.id_size = 8;
-	field.id_offset = AT91SAM9_SMC_GENERIC_BLK_SZ;
-
-	field.reg = AT91SAM9_SMC_SETUP(AT91SAM9_SMC_GENERIC);
-	fields.setup = devm_regmap_field_alloc(dev, smc, field);
-	if (IS_ERR(fields.setup))
-		return PTR_ERR(fields.setup);
-
-	field.reg = AT91SAM9_SMC_PULSE(AT91SAM9_SMC_GENERIC);
-	fields.pulse = devm_regmap_field_alloc(dev, smc, field);
-	if (IS_ERR(fields.pulse))
-		return PTR_ERR(fields.pulse);
-
-	field.reg = AT91SAM9_SMC_CYCLE(AT91SAM9_SMC_GENERIC);
-	fields.cycle = devm_regmap_field_alloc(dev, smc, field);
-	if (IS_ERR(fields.cycle))
-		return PTR_ERR(fields.cycle);
-
-	field.reg = AT91SAM9_SMC_MODE(AT91SAM9_SMC_GENERIC);
-	fields.mode = devm_regmap_field_alloc(dev, smc, field);
-
-	return PTR_ERR_OR_ZERO(fields.mode);
-}
-
-static int pata_at91_probe(struct platform_device *pdev)
-{
-	struct at91_cf_data *board = dev_get_platdata(&pdev->dev);
-	struct device *dev = &pdev->dev;
-	struct at91_ide_info *info;
-	struct resource *mem_res;
-	struct ata_host *host;
-	struct ata_port *ap;
-
-	int irq_flags = 0;
-	int irq = 0;
-	int ret;
-
-	/*  get platform resources: IO/CTL memories and irq/rst pins */
-
-	if (pdev->num_resources != 1) {
-		dev_err(&pdev->dev, "invalid number of resources\n");
-		return -EINVAL;
-	}
-
-	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	if (!mem_res) {
-		dev_err(dev, "failed to get mem resource\n");
-		return -EINVAL;
-	}
-
-	irq = board->irq_pin;
-
-	smc = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "atmel,smc");
-	if (IS_ERR(smc))
-		return PTR_ERR(smc);
-
-	ret = at91sam9_smc_fields_init(dev);
-	if (ret < 0)
-		return ret;
-
-	/* init ata host */
-
-	host = ata_host_alloc(dev, 1);
-
-	if (!host)
-		return -ENOMEM;
-
-	ap = host->ports[0];
-	ap->ops = &pata_at91_port_ops;
-	ap->flags |= ATA_FLAG_SLAVE_POSS;
-	ap->pio_mask = ATA_PIO4;
-
-	if (!gpio_is_valid(irq)) {
-		ap->flags |= ATA_FLAG_PIO_POLLING;
-		ata_port_desc(ap, "no IRQ, using PIO polling");
-	}
-
-	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
-
-	if (!info) {
-		dev_err(dev, "failed to allocate memory for private data\n");
-		return -ENOMEM;
-	}
-
-	info->mck = clk_get(NULL, "mck");
-
-	if (IS_ERR(info->mck)) {
-		dev_err(dev, "failed to get access to mck clock\n");
-		return -ENODEV;
-	}
-
-	info->cs    = board->chipselect;
-	info->mode  = AT91_SMC_READMODE | AT91_SMC_WRITEMODE |
-		AT91_SMC_EXNWMODE_READY | AT91_SMC_BAT_SELECT |
-		AT91_SMC_DBW_8 | AT91_SMC_TDF_(0);
-
-	info->ide_addr = devm_ioremap(dev,
-			mem_res->start + CF_IDE_OFFSET, CF_IDE_RES_SIZE);
-
-	if (!info->ide_addr) {
-		dev_err(dev, "failed to map IO base\n");
-		ret = -ENOMEM;
-		goto err_put;
-	}
-
-	info->alt_addr = devm_ioremap(dev,
-			mem_res->start + CF_ALT_IDE_OFFSET, CF_IDE_RES_SIZE);
-
-	if (!info->alt_addr) {
-		dev_err(dev, "failed to map CTL base\n");
-		ret = -ENOMEM;
-		goto err_put;
-	}
-
-	ap->ioaddr.cmd_addr = info->ide_addr;
-	ap->ioaddr.ctl_addr = info->alt_addr + 0x06;
-	ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr;
-
-	ata_sff_std_ports(&ap->ioaddr);
-
-	ata_port_desc(ap, "mmio cmd 0x%llx ctl 0x%llx",
-			(unsigned long long)mem_res->start + CF_IDE_OFFSET,
-			(unsigned long long)mem_res->start + CF_ALT_IDE_OFFSET);
-
-	host->private_data = info;
-
-	ret = ata_host_activate(host, gpio_is_valid(irq) ? gpio_to_irq(irq) : 0,
-				gpio_is_valid(irq) ? ata_sff_interrupt : NULL,
-				irq_flags, &pata_at91_sht);
-	if (ret)
-		goto err_put;
-
-	return 0;
-
-err_put:
-	clk_put(info->mck);
-	return ret;
-}
-
-static int pata_at91_remove(struct platform_device *pdev)
-{
-	struct ata_host *host = platform_get_drvdata(pdev);
-	struct at91_ide_info *info;
-
-	if (!host)
-		return 0;
-	info = host->private_data;
-
-	ata_host_detach(host);
-
-	if (!info)
-		return 0;
-
-	clk_put(info->mck);
-
-	return 0;
-}
-
-static struct platform_driver pata_at91_driver = {
-	.probe		= pata_at91_probe,
-	.remove		= pata_at91_remove,
-	.driver		= {
-		.name		= DRV_NAME,
-	},
-};
-
-module_platform_driver(pata_at91_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Driver for CF in True IDE mode on AT91SAM9260 SoC");
-MODULE_AUTHOR("Matyukevich Sergey");
-MODULE_VERSION(DRV_VERSION);
-
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index e347e7acd8ed..0adcb40d2794 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -1328,7 +1328,7 @@ static int pata_macio_pci_resume(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-static struct of_device_id pata_macio_match[] =
+static const struct of_device_id pata_macio_match[] =
 {
 	{
 	.name 		= "IDE",
diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c
index 252ba27fa63b..9730125530f6 100644
--- a/drivers/ata/pata_mpc52xx.c
+++ b/drivers/ata/pata_mpc52xx.c
@@ -847,7 +847,7 @@ mpc52xx_ata_resume(struct platform_device *op)
 }
 #endif
 
-static struct of_device_id mpc52xx_ata_of_match[] = {
+static const struct of_device_id mpc52xx_ata_of_match[] = {
 	{ .compatible = "fsl,mpc5200-ata", },
 	{ .compatible = "mpc5200-ata", },
 	{},
diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c
index 201a32d0627f..01161c1aef4d 100644
--- a/drivers/ata/pata_of_platform.c
+++ b/drivers/ata/pata_of_platform.c
@@ -67,7 +67,7 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
 				     reg_shift, pio_mask, &pata_platform_sht);
 }
 
-static struct of_device_id pata_of_platform_match[] = {
+static const struct of_device_id pata_of_platform_match[] = {
 	{ .compatible = "ata-generic", },
 	{ },
 };
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index a723ae929783..01734d54c69c 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -1612,7 +1612,7 @@ static int sata_fsl_resume(struct platform_device *op)
 }
 #endif
 
-static struct of_device_id fsl_sata_match[] = {
+static const struct of_device_id fsl_sata_match[] = {
 	{
 		.compatible = "fsl,pq-sata",
 	},
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 00ce26d0c047..b66bcda88320 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -4286,7 +4286,7 @@ static int mv_platform_resume(struct platform_device *pdev)
 #endif
 
 #ifdef CONFIG_OF
-static struct of_device_id mv_sata_dt_ids[] = {
+static const struct of_device_id mv_sata_dt_ids[] = {
 	{ .compatible = "marvell,armada-370-sata", },
 	{ .compatible = "marvell,orion-sata", },
 	{},
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e697dec9d25b..ad196427b4f2 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -121,7 +121,9 @@ static const struct genpd_lock_ops genpd_spin_ops = {
 #define genpd_lock_interruptible(p)	p->lock_ops->lock_interruptible(p)
 #define genpd_unlock(p)			p->lock_ops->unlock(p)
 
+#define genpd_status_on(genpd)		(genpd->status == GPD_STATE_ACTIVE)
 #define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
+#define genpd_is_always_on(genpd)	(genpd->flags & GENPD_FLAG_ALWAYS_ON)
 
 static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
 		struct generic_pm_domain *genpd)
@@ -130,8 +132,12 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
 
 	ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
 
-	/* Warn once if IRQ safe dev in no sleep domain */
-	if (ret)
+	/*
+	 * Warn once if an IRQ safe device is attached to a no sleep domain, as
+	 * to indicate a suboptimal configuration for PM. For an always on
+	 * domain this isn't case, thus don't warn.
+	 */
+	if (ret && !genpd_is_always_on(genpd))
 		dev_warn_once(dev, "PM domain %s will not be powered off\n",
 				genpd->name);
 
@@ -296,11 +302,15 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
 	 * (1) The domain is already in the "power off" state.
 	 * (2) System suspend is in progress.
 	 */
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    || genpd->prepared_count > 0)
+	if (!genpd_status_on(genpd) || genpd->prepared_count > 0)
 		return 0;
 
-	if (atomic_read(&genpd->sd_count) > 0)
+	/*
+	 * Abort power off for the PM domain in the following situations:
+	 * (1) The domain is configured as always on.
+	 * (2) When the domain has a subdomain being powered on.
+	 */
+	if (genpd_is_always_on(genpd) || atomic_read(&genpd->sd_count) > 0)
 		return -EBUSY;
 
 	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
@@ -373,7 +383,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
 	struct gpd_link *link;
 	int ret = 0;
 
-	if (genpd->status == GPD_STATE_ACTIVE)
+	if (genpd_status_on(genpd))
 		return 0;
 
 	/*
@@ -752,7 +762,7 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 {
 	struct gpd_link *link;
 
-	if (genpd->status == GPD_STATE_POWER_OFF)
+	if (!genpd_status_on(genpd) || genpd_is_always_on(genpd))
 		return;
 
 	if (genpd->suspended_count != genpd->device_count
@@ -761,7 +771,8 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 
 	/* Choose the deepest state when suspending */
 	genpd->state_idx = genpd->state_count - 1;
-	_genpd_power_off(genpd, false);
+	if (_genpd_power_off(genpd, false))
+		return;
 
 	genpd->status = GPD_STATE_POWER_OFF;
 
@@ -793,7 +804,7 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock,
 {
 	struct gpd_link *link;
 
-	if (genpd->status == GPD_STATE_ACTIVE)
+	if (genpd_status_on(genpd))
 		return;
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
@@ -1329,8 +1340,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	genpd_lock(subdomain);
 	genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING);
 
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
+	if (!genpd_status_on(genpd) && genpd_status_on(subdomain)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1346,7 +1356,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd,
 	list_add_tail(&link->master_node, &genpd->master_links);
 	link->slave = subdomain;
 	list_add_tail(&link->slave_node, &subdomain->slave_links);
-	if (subdomain->status != GPD_STATE_POWER_OFF)
+	if (genpd_status_on(subdomain))
 		genpd_sd_counter_inc(genpd);
 
  out:
@@ -1406,7 +1416,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 		list_del(&link->master_node);
 		list_del(&link->slave_node);
 		kfree(link);
-		if (subdomain->status != GPD_STATE_POWER_OFF)
+		if (genpd_status_on(subdomain))
 			genpd_sd_counter_dec(genpd);
 
 		ret = 0;
@@ -1492,6 +1502,10 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 		genpd->dev_ops.start = pm_clk_resume;
 	}
 
+	/* Always-on domains must be powered on at initialization. */
+	if (genpd_is_always_on(genpd) && !genpd_status_on(genpd))
+		return -EINVAL;
+
 	/* Use only one "off" state if there were no states declared */
 	if (genpd->state_count == 0) {
 		ret = genpd_set_default_power_state(genpd);
@@ -1700,12 +1714,12 @@ int of_genpd_add_provider_simple(struct device_node *np,
 
 	mutex_lock(&gpd_list_lock);
 
-	if (pm_genpd_present(genpd))
+	if (pm_genpd_present(genpd)) {
 		ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
-
-	if (!ret) {
-		genpd->provider = &np->fwnode;
-		genpd->has_provider = true;
+		if (!ret) {
+			genpd->provider = &np->fwnode;
+			genpd->has_provider = true;
+		}
 	}
 
 	mutex_unlock(&gpd_list_lock);
@@ -2079,11 +2093,6 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
 	int err;
 	u32 residency;
 	u32 entry_latency, exit_latency;
-	const struct of_device_id *match_id;
-
-	match_id = of_match_node(idle_state_match, state_node);
-	if (!match_id)
-		return -EINVAL;
 
 	err = of_property_read_u32(state_node, "entry-latency-us",
 						&entry_latency);
@@ -2132,6 +2141,7 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 	int err, ret;
 	int count;
 	struct of_phandle_iterator it;
+	const struct of_device_id *match_id;
 
 	count = of_count_phandle_with_args(dn, "domain-idle-states", NULL);
 	if (count <= 0)
@@ -2144,6 +2154,9 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 	/* Loop over the phandles until all the requested entry is found */
 	of_for_each_phandle(&it, err, dn, "domain-idle-states", NULL, 0) {
 		np = it.node;
+		match_id = of_match_node(idle_state_match, np);
+		if (!match_id)
+			continue;
 		ret = genpd_parse_state(&st[i++], np);
 		if (ret) {
 			pr_err
@@ -2155,8 +2168,11 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 		}
 	}
 
-	*n = count;
-	*states = st;
+	*n = i;
+	if (!i)
+		kfree(st);
+	else
+		*states = st;
 
 	return 0;
 }
@@ -2221,7 +2237,7 @@ static int pm_genpd_summary_one(struct seq_file *s,
 
 	if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup)))
 		goto exit;
-	if (genpd->status == GPD_STATE_POWER_OFF)
+	if (!genpd_status_on(genpd))
 		snprintf(state, sizeof(state), "%s-%u",
 			 status_lookup[genpd->status], genpd->state_idx);
 	else
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index f744de7a0f9b..19df4918e37e 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -312,22 +312,6 @@ config BLK_DEV_SKD
 
 	Use device /dev/skd$N amd /dev/skd$Np$M.
 
-config BLK_DEV_OSD
-	tristate "OSD object-as-blkdev support"
-	depends on SCSI_OSD_ULD
-	---help---
-	  Saying Y or M here will allow the exporting of a single SCSI
-	  OSD (object-based storage) object as a Linux block device.
-
-	  For example, if you create a 2G object on an OSD device,
-	  you can then use this module to present that 2G object as
-	  a Linux block device.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called osdblk.
-
-	  If unsure, say N.
-
 config BLK_DEV_SX8
 	tristate "Promise SATA SX8 support"
 	depends on PCI
@@ -434,23 +418,6 @@ config ATA_OVER_ETH
 	This driver provides Support for ATA over Ethernet block
 	devices like the Coraid EtherDrive (R) Storage Blade.
 
-config MG_DISK
-	tristate "mGine mflash, gflash support"
-	depends on ARM && GPIOLIB
-	help
-	  mGine mFlash(gFlash) block device driver
-
-config MG_DISK_RES
-	int "Size of reserved area before MBR"
-	depends on MG_DISK
-	default 0
-	help
-	  Define size of reserved area that usually used for boot. Unit is KB.
-	  All of the block device operation will be taken this value as start
-	  offset
-	  Examples:
-			1024 => 1 MB
-
 config SUNVDC
 	tristate "Sun Virtual Disk Client support"
 	depends on SUN_LDOMS
@@ -512,19 +479,7 @@ config VIRTIO_BLK_SCSI
 	  Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on
 	  virtio-blk devices.  This is only supported for the legacy
 	  virtio protocol and not enabled by default by any hypervisor.
-	  Your probably want to virtio-scsi instead.
-
-config BLK_DEV_HD
-	bool "Very old hard disk (MFM/RLL/IDE) driver"
-	depends on HAVE_IDE
-	depends on !ARM || ARCH_RPC || BROKEN
-	help
-	  This is a very old hard disk driver that lacks the enhanced
-	  functionality of the newer ones.
-
-	  It is required for systems with ancient MFM/RLL/ESDI drives.
-
-	  If unsure, say N.
+	  You probably want to use virtio-scsi instead.
 
 config BLK_DEV_RBD
 	tristate "Rados block device (RBD)"
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 1e9661e26f29..ec8c36897b75 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -19,10 +19,8 @@ obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
 obj-$(CONFIG_BLK_DEV_DAC960)	+= DAC960.o
 obj-$(CONFIG_XILINX_SYSACE)	+= xsysace.o
 obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
-obj-$(CONFIG_MG_DISK)		+= mg_disk.o
 obj-$(CONFIG_SUNVDC)		+= sunvdc.o
 obj-$(CONFIG_BLK_DEV_SKD)	+= skd.o
-obj-$(CONFIG_BLK_DEV_OSD)	+= osdblk.o
 
 obj-$(CONFIG_BLK_DEV_UMEM)	+= umem.o
 obj-$(CONFIG_BLK_DEV_NBD)	+= nbd.o
@@ -30,7 +28,6 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
 obj-$(CONFIG_VIRTIO_BLK)	+= virtio_blk.o
 
 obj-$(CONFIG_BLK_DEV_SX8)	+= sx8.o
-obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
 
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= xen-blkback/
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 2104b1b4ccda..fa69ecd52cb5 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -617,12 +617,12 @@ static void fd_error( void )
 	if (!fd_request)
 		return;
 
-	fd_request->errors++;
-	if (fd_request->errors >= MAX_ERRORS) {
+	fd_request->error_count++;
+	if (fd_request->error_count >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
 		fd_end_request_cur(-EIO);
 	}
-	else if (fd_request->errors == RECALIBRATE_ERRORS) {
+	else if (fd_request->error_count == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
 		if (SelectedDrive != -1)
 			SUD.track = -1;
@@ -1386,7 +1386,7 @@ static void setup_req_params( int drive )
 	ReqData = ReqBuffer + 512 * ReqCnt;
 
 	if (UseTrackbuffer)
-		read_track = (ReqCmd == READ && fd_request->errors == 0);
+		read_track = (ReqCmd == READ && fd_request->error_count == 0);
 	else
 		read_track = 0;
 
@@ -1409,8 +1409,10 @@ static struct request *set_next_request(void)
 			fdc_queue = 0;
 		if (q) {
 			rq = blk_fetch_request(q);
-			if (rq)
+			if (rq) {
+				rq->error_count = 0;
 				break;
+			}
 		}
 	} while (fdc_queue != old_pos);
 
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 3adc32a3153b..4ec84d504780 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -134,28 +134,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
 	return page;
 }
 
-static void brd_free_page(struct brd_device *brd, sector_t sector)
-{
-	struct page *page;
-	pgoff_t idx;
-
-	spin_lock(&brd->brd_lock);
-	idx = sector >> PAGE_SECTORS_SHIFT;
-	page = radix_tree_delete(&brd->brd_pages, idx);
-	spin_unlock(&brd->brd_lock);
-	if (page)
-		__free_page(page);
-}
-
-static void brd_zero_page(struct brd_device *brd, sector_t sector)
-{
-	struct page *page;
-
-	page = brd_lookup_page(brd, sector);
-	if (page)
-		clear_highpage(page);
-}
-
 /*
  * Free all backing store pages and radix tree. This must only be called when
  * there are no other users of the device.
@@ -212,24 +190,6 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
 	return 0;
 }
 
-static void discard_from_brd(struct brd_device *brd,
-			sector_t sector, size_t n)
-{
-	while (n >= PAGE_SIZE) {
-		/*
-		 * Don't want to actually discard pages here because
-		 * re-allocating the pages can result in writeback
-		 * deadlocks under heavy load.
-		 */
-		if (0)
-			brd_free_page(brd, sector);
-		else
-			brd_zero_page(brd, sector);
-		sector += PAGE_SIZE >> SECTOR_SHIFT;
-		n -= PAGE_SIZE;
-	}
-}
-
 /*
  * Copy n bytes from src to the brd starting at sector. Does not sleep.
  */
@@ -338,14 +298,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 	if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
 		goto io_error;
 
-	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
-		if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) ||
-		    bio->bi_iter.bi_size & ~PAGE_MASK)
-			goto io_error;
-		discard_from_brd(brd, sector, bio->bi_iter.bi_size);
-		goto out;
-	}
-
 	bio_for_each_segment(bvec, bio, iter) {
 		unsigned int len = bvec.bv_len;
 		int err;
@@ -357,7 +309,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 		sector += len >> SECTOR_SHIFT;
 	}
 
-out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 io_error:
@@ -464,11 +415,6 @@ static struct brd_device *brd_alloc(int i)
 	 *  is harmless)
 	 */
 	blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
-
-	brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
-	blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
-	brd->brd_queue->limits.discard_zeroes_data = 1;
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 	queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
 #endif
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 8e1a4554951c..cd375503f7b0 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,8 +1864,7 @@ static void cciss_softirq_done(struct request *rq)
 	/* set the residual count for pc requests */
 	if (blk_rq_is_passthrough(rq))
 		scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
-
-	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
+	blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, c);
@@ -3140,18 +3139,19 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 {
 	int retry_cmd = 0;
 	struct request *rq = cmd->rq;
+	struct scsi_request *sreq = scsi_req(rq);
 
-	rq->errors = 0;
+	sreq->result = 0;
 
 	if (timeout)
-		rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
+		sreq->result = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
 
 	if (cmd->err_info->CommandStatus == 0)	/* no error has occurred */
 		goto after_error_processing;
 
 	switch (cmd->err_info->CommandStatus) {
 	case CMD_TARGET_STATUS:
-		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
+		sreq->result = evaluate_target_status(h, cmd, &retry_cmd);
 		break;
 	case CMD_DATA_UNDERRUN:
 		if (!blk_rq_is_passthrough(cmd->rq)) {
@@ -3169,7 +3169,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 	case CMD_INVALID:
 		dev_warn(&h->pdev->dev, "cciss: cmd %p is "
 		       "reported invalid\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
@@ -3177,7 +3177,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 	case CMD_PROTOCOL_ERR:
 		dev_warn(&h->pdev->dev, "cciss: cmd %p has "
 		       "protocol error\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
@@ -3185,7 +3185,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 	case CMD_HARDWARE_ERR:
 		dev_warn(&h->pdev->dev, "cciss: cmd %p had "
 		       " hardware error\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
@@ -3193,7 +3193,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 	case CMD_CONNECTION_LOST:
 		dev_warn(&h->pdev->dev, "cciss: cmd %p had "
 		       "connection lost\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
@@ -3201,7 +3201,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 	case CMD_ABORTED:
 		dev_warn(&h->pdev->dev, "cciss: cmd %p was "
 		       "aborted\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ABORT);
@@ -3209,7 +3209,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 	case CMD_ABORT_FAILED:
 		dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
 		       "abort failed\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
@@ -3224,21 +3224,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 		} else
 			dev_warn(&h->pdev->dev,
 				"%p retried too many times\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ABORT);
 		break;
 	case CMD_TIMEOUT:
 		dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
 		break;
 	case CMD_UNABORTABLE:
 		dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
@@ -3247,7 +3247,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 		dev_warn(&h->pdev->dev, "cmd %p returned "
 		       "unknown status %x\n", cmd,
 		       cmd->err_info->CommandStatus);
-		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+		sreq->result = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
 			blk_rq_is_passthrough(cmd->rq) ?
 				DID_PASSTHROUGH : DID_ERROR);
@@ -3380,9 +3380,9 @@ static void do_cciss_request(struct request_queue *q)
 		if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
 			dev_warn(&h->pdev->dev,
 				"%s: error mapping page for DMA\n", __func__);
-			creq->errors = make_status_bytes(SAM_STAT_GOOD,
-							0, DRIVER_OK,
-							DID_SOFT_ERROR);
+			scsi_req(creq)->result =
+				make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
+						  DID_SOFT_ERROR);
 			cmd_free(h, c);
 			return;
 		}
@@ -3395,9 +3395,9 @@ static void do_cciss_request(struct request_queue *q)
 		if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
 			(seg - (h->max_cmd_sgentries - 1)) *
 				sizeof(SGDescriptor_struct))) {
-			creq->errors = make_status_bytes(SAM_STAT_GOOD,
-							0, DRIVER_OK,
-							DID_SOFT_ERROR);
+			scsi_req(creq)->result =
+				make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK,
+						  DID_SOFT_ERROR);
 			cmd_free(h, c);
 			return;
 		}
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index de5c3ee8a790..494837e59f23 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -236,9 +236,6 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
 	seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
 	seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
 	seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
-
-	if (f & EE_IS_TRIM)
-		__seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim");
 	seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
 	seq_putc(m, '\n');
 }
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 724d1c50fc52..d5da45bb03a6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -437,9 +437,6 @@ enum {
 
 	/* is this a TRIM aka REQ_DISCARD? */
 	__EE_IS_TRIM,
-	/* our lower level cannot handle trim,
-	 * and we want to fall back to zeroout instead */
-	__EE_IS_TRIM_USE_ZEROOUT,
 
 	/* In case a barrier failed,
 	 * we need to resubmit without the barrier flag. */
@@ -482,7 +479,6 @@ enum {
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 #define EE_IS_TRIM             (1<<__EE_IS_TRIM)
-#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
 #define EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
@@ -1561,8 +1557,6 @@ extern void start_resync_timer_fn(unsigned long data);
 extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
 
 /* drbd_receiver.c */
-extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
-		sector_t start, unsigned int nr_sectors, bool discard);
 extern int drbd_receiver(struct drbd_thread *thi);
 extern int drbd_ack_receiver(struct drbd_thread *thi);
 extern void drbd_send_ping_wf(struct work_struct *ws);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 92c60cbd04ee..84455c365f57 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -931,7 +931,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r
 		p->qlim->io_min = cpu_to_be32(queue_io_min(q));
 		p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
 		p->qlim->discard_enabled = blk_queue_discard(q);
-		p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q);
 		p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
 	} else {
 		q = device->rq_queue;
@@ -941,7 +940,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r
 		p->qlim->io_min = cpu_to_be32(queue_io_min(q));
 		p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
 		p->qlim->discard_enabled = 0;
-		p->qlim->discard_zeroes_data = 0;
 		p->qlim->write_same_capable = 0;
 	}
 }
@@ -1668,7 +1666,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
 			(bio->bi_opf & REQ_FUA ? DP_FUA : 0) |
 			(bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
 			(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
-			(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
+			(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
+			(bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0);
 	else
 		return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
 }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 908c704e20aa..02255a0d68b9 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1199,10 +1199,6 @@ static void decide_on_discard_support(struct drbd_device *device,
 	struct drbd_connection *connection = first_peer_device(device)->connection;
 	bool can_do = b ? blk_queue_discard(b) : true;
 
-	if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) {
-		can_do = false;
-		drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
-	}
 	if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
 		can_do = false;
 		drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
@@ -1217,10 +1213,12 @@ static void decide_on_discard_support(struct drbd_device *device,
 		blk_queue_discard_granularity(q, 512);
 		q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
 		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+		q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
 	} else {
 		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
 		blk_queue_discard_granularity(q, 0);
 		q->limits.max_discard_sectors = 0;
+		q->limits.max_write_zeroes_sectors = 0;
 	}
 }
 
@@ -1482,8 +1480,7 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
 	if (disk_conf->al_extents > drbd_al_extents_max(nbc))
 		disk_conf->al_extents = drbd_al_extents_max(nbc);
 
-	if (!blk_queue_discard(q)
-	    || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) {
+	if (!blk_queue_discard(q)) {
 		if (disk_conf->rs_discard_granularity) {
 			disk_conf->rs_discard_granularity = 0; /* disable feature */
 			drbd_info(device, "rs_discard_granularity feature disabled\n");
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index aa6bf9692eff..1b0a2be24f39 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1448,105 +1448,14 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
 }
 
-/*
- * We *may* ignore the discard-zeroes-data setting, if so configured.
- *
- * Assumption is that it "discard_zeroes_data=0" is only because the backend
- * may ignore partial unaligned discards.
- *
- * LVM/DM thin as of at least
- *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
- *   Library version: 1.02.93-RHEL7 (2015-01-28)
- *   Driver version:  4.29.0
- * still behaves this way.
- *
- * For unaligned (wrt. alignment and granularity) or too small discards,
- * we zero-out the initial (and/or) trailing unaligned partial chunks,
- * but discard all the aligned full chunks.
- *
- * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1".
- */
-int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
-{
-	struct block_device *bdev = device->ldev->backing_bdev;
-	struct request_queue *q = bdev_get_queue(bdev);
-	sector_t tmp, nr;
-	unsigned int max_discard_sectors, granularity;
-	int alignment;
-	int err = 0;
-
-	if (!discard)
-		goto zero_out;
-
-	/* Zero-sector (unknown) and one-sector granularities are the same.  */
-	granularity = max(q->limits.discard_granularity >> 9, 1U);
-	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
-	max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
-	max_discard_sectors -= max_discard_sectors % granularity;
-	if (unlikely(!max_discard_sectors))
-		goto zero_out;
-
-	if (nr_sectors < granularity)
-		goto zero_out;
-
-	tmp = start;
-	if (sector_div(tmp, granularity) != alignment) {
-		if (nr_sectors < 2*granularity)
-			goto zero_out;
-		/* start + gran - (start + gran - align) % gran */
-		tmp = start + granularity - alignment;
-		tmp = start + granularity - sector_div(tmp, granularity);
-
-		nr = tmp - start;
-		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
-		nr_sectors -= nr;
-		start = tmp;
-	}
-	while (nr_sectors >= granularity) {
-		nr = min_t(sector_t, nr_sectors, max_discard_sectors);
-		err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
-		nr_sectors -= nr;
-		start += nr;
-	}
- zero_out:
-	if (nr_sectors) {
-		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0);
-	}
-	return err != 0;
-}
-
-static bool can_do_reliable_discards(struct drbd_device *device)
-{
-	struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
-	struct disk_conf *dc;
-	bool can_do;
-
-	if (!blk_queue_discard(q))
-		return false;
-
-	if (q->limits.discard_zeroes_data)
-		return true;
-
-	rcu_read_lock();
-	dc = rcu_dereference(device->ldev->disk_conf);
-	can_do = dc->discard_zeroes_if_aligned;
-	rcu_read_unlock();
-	return can_do;
-}
-
 static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
 {
-	/* If the backend cannot discard, or does not guarantee
-	 * read-back zeroes in discarded ranges, we fall back to
-	 * zero-out.  Unless configuration specifically requested
-	 * otherwise. */
-	if (!can_do_reliable_discards(device))
-		peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
+	struct block_device *bdev = device->ldev->backing_bdev;
 
-	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
-	    peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT)))
+	if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9,
+			GFP_NOIO, 0))
 		peer_req->flags |= EE_WAS_ERROR;
+
 	drbd_endio_write_sec_final(peer_req);
 }
 
@@ -2376,7 +2285,7 @@ static unsigned long wire_flags_to_bio_flags(u32 dpf)
 static unsigned long wire_flags_to_bio_op(u32 dpf)
 {
 	if (dpf & DP_DISCARD)
-		return REQ_OP_DISCARD;
+		return REQ_OP_WRITE_ZEROES;
 	else
 		return REQ_OP_WRITE;
 }
@@ -2567,7 +2476,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	op_flags = wire_flags_to_bio_flags(dp_flags);
 	if (pi->cmd == P_TRIM) {
 		D_ASSERT(peer_device, peer_req->i.size > 0);
-		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
+		D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
 		D_ASSERT(peer_device, peer_req->pages == NULL);
 	} else if (peer_req->pages == NULL) {
 		D_ASSERT(device, peer_req->i.size == 0);
@@ -4880,7 +4789,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
 
 	if (get_ldev(device)) {
 		struct drbd_peer_request *peer_req;
-		const int op = REQ_OP_DISCARD;
+		const int op = REQ_OP_WRITE_ZEROES;
 
 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
 					       size, 0, GFP_NOIO);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 652114ae1a8a..b5730e17b455 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -59,6 +59,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
 	drbd_req_make_private_bio(req, bio_src);
 	req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
 		      | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
+		      | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0)
 		      | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
 	req->device = device;
 	req->master_bio = bio_src;
@@ -1148,10 +1149,10 @@ static int drbd_process_write_request(struct drbd_request *req)
 
 static void drbd_process_discard_req(struct drbd_request *req)
 {
-	int err = drbd_issue_discard_or_zero_out(req->device,
-				req->i.sector, req->i.size >> 9, true);
+	struct block_device *bdev = req->device->ldev->backing_bdev;
 
-	if (err)
+	if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
+			GFP_NOIO, 0))
 		req->private_bio->bi_error = -EIO;
 	bio_endio(req->private_bio);
 }
@@ -1180,7 +1181,8 @@ drbd_submit_req_private_bio(struct drbd_request *req)
 	if (get_ldev(device)) {
 		if (drbd_insert_fault(device, type))
 			bio_io_error(bio);
-		else if (bio_op(bio) == REQ_OP_DISCARD)
+		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
+			 bio_op(bio) == REQ_OP_DISCARD)
 			drbd_process_discard_req(req);
 		else
 			generic_make_request(bio);
@@ -1234,7 +1236,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 	_drbd_start_io_acct(device, req);
 
 	/* process discards always from our submitter thread */
-	if (bio_op(bio) & REQ_OP_DISCARD)
+	if ((bio_op(bio) & REQ_OP_WRITE_ZEROES) ||
+	    (bio_op(bio) & REQ_OP_DISCARD))
 		goto queue_for_submitter_thread;
 
 	if (rw == WRITE && req->private_bio && req->i.size
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 3bff33f21435..1afcb4e02d8d 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -174,7 +174,8 @@ void drbd_peer_request_endio(struct bio *bio)
 	struct drbd_peer_request *peer_req = bio->bi_private;
 	struct drbd_device *device = peer_req->peer_device->device;
 	bool is_write = bio_data_dir(bio) == WRITE;
-	bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD);
+	bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
+			  bio_op(bio) == REQ_OP_DISCARD;
 
 	if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
 		drbd_warn(device, "%s: error=%d s=%llus\n",
@@ -249,6 +250,7 @@ void drbd_request_endio(struct bio *bio)
 	/* to avoid recursion in __req_mod */
 	if (unlikely(bio->bi_error)) {
 		switch (bio_op(bio)) {
+		case REQ_OP_WRITE_ZEROES:
 		case REQ_OP_DISCARD:
 			if (bio->bi_error == -EOPNOTSUPP)
 				what = DISCARD_COMPLETED_NOTSUPP;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 45b4384f650c..60d4c7653178 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2805,8 +2805,10 @@ static int set_next_request(void)
 			fdc_queue = 0;
 		if (q) {
 			current_req = blk_fetch_request(q);
-			if (current_req)
+			if (current_req) {
+				current_req->error_count = 0;
 				break;
+			}
 		}
 	} while (fdc_queue != old_pos);
 
@@ -2866,7 +2868,7 @@ do_request:
 		_floppy = floppy_type + DP->autodetect[DRS->probed_format];
 	} else
 		probing = 0;
-	errors = &(current_req->errors);
+	errors = &(current_req->error_count);
 	tmp = make_raw_rw_request();
 	if (tmp < 2) {
 		request_done(tmp);
@@ -4207,9 +4209,7 @@ static int __init do_floppy_init(void)
 		disks[drive]->fops = &floppy_fops;
 		sprintf(disks[drive]->disk_name, "fd%d", drive);
 
-		init_timer(&motor_off_timer[drive]);
-		motor_off_timer[drive].data = drive;
-		motor_off_timer[drive].function = motor_off_callback;
+		setup_timer(&motor_off_timer[drive], motor_off_callback, drive);
 	}
 
 	err = register_blkdev(FLOPPY_MAJOR, "fd");
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
deleted file mode 100644
index 6043648da1e8..000000000000
--- a/drivers/block/hd.c
+++ /dev/null
@@ -1,803 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This is the low-level hd interrupt support. It traverses the
- * request-list, using interrupts to jump between functions. As
- * all the functions are called within interrupts, we may not
- * sleep. Special care is recommended.
- *
- *  modified by Drew Eckhardt to check nr of hd's from the CMOS.
- *
- *  Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
- *  in the early extended-partition checks and added DM partitions
- *
- *  IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
- *  and general streamlining by Mark Lord.
- *
- *  Removed 99% of above. Use Mark's ide driver for those options.
- *  This is now a lightweight ST-506 driver. (Paul Gortmaker)
- *
- *  Modified 1995 Russell King for ARM processor.
- *
- *  Bugfix: max_sectors must be <= 255 or the wheels tend to come
- *  off in a hurry once you queue things up - Paul G. 02/2001
- */
-
-/* Uncomment the following if you want verbose error reports. */
-/* #define VERBOSE_ERRORS */
-
-#include <linux/blkdev.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/interrupt.h>
-#include <linux/timer.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/genhd.h>
-#include <linux/string.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/blkpg.h>
-#include <linux/ata.h>
-#include <linux/hdreg.h>
-
-#define HD_IRQ 14
-
-#define REALLY_SLOW_IO
-#include <asm/io.h>
-#include <linux/uaccess.h>
-
-#ifdef __arm__
-#undef  HD_IRQ
-#endif
-#include <asm/irq.h>
-#ifdef __arm__
-#define HD_IRQ IRQ_HARDDISK
-#endif
-
-/* Hd controller regster ports */
-
-#define HD_DATA		0x1f0		/* _CTL when writing */
-#define HD_ERROR	0x1f1		/* see err-bits */
-#define HD_NSECTOR	0x1f2		/* nr of sectors to read/write */
-#define HD_SECTOR	0x1f3		/* starting sector */
-#define HD_LCYL		0x1f4		/* starting cylinder */
-#define HD_HCYL		0x1f5		/* high byte of starting cyl */
-#define HD_CURRENT	0x1f6		/* 101dhhhh , d=drive, hhhh=head */
-#define HD_STATUS	0x1f7		/* see status-bits */
-#define HD_FEATURE	HD_ERROR	/* same io address, read=error, write=feature */
-#define HD_PRECOMP	HD_FEATURE	/* obsolete use of this port - predates IDE */
-#define HD_COMMAND	HD_STATUS	/* same io address, read=status, write=cmd */
-
-#define HD_CMD		0x3f6		/* used for resets */
-#define HD_ALTSTATUS	0x3f6		/* same as HD_STATUS but doesn't clear irq */
-
-/* Bits of HD_STATUS */
-#define ERR_STAT		0x01
-#define INDEX_STAT		0x02
-#define ECC_STAT		0x04	/* Corrected error */
-#define DRQ_STAT		0x08
-#define SEEK_STAT		0x10
-#define SERVICE_STAT		SEEK_STAT
-#define WRERR_STAT		0x20
-#define READY_STAT		0x40
-#define BUSY_STAT		0x80
-
-/* Bits for HD_ERROR */
-#define MARK_ERR		0x01	/* Bad address mark */
-#define TRK0_ERR		0x02	/* couldn't find track 0 */
-#define ABRT_ERR		0x04	/* Command aborted */
-#define MCR_ERR			0x08	/* media change request */
-#define ID_ERR			0x10	/* ID field not found */
-#define MC_ERR			0x20	/* media changed */
-#define ECC_ERR			0x40	/* Uncorrectable ECC error */
-#define BBD_ERR			0x80	/* pre-EIDE meaning:  block marked bad */
-#define ICRC_ERR		0x80	/* new meaning:  CRC error during transfer */
-
-static DEFINE_SPINLOCK(hd_lock);
-static struct request_queue *hd_queue;
-static struct request *hd_req;
-
-#define TIMEOUT_VALUE	(6*HZ)
-#define	HD_DELAY	0
-
-#define MAX_ERRORS     16	/* Max read/write errors/sector */
-#define RESET_FREQ      8	/* Reset controller every 8th retry */
-#define RECAL_FREQ      4	/* Recalibrate every 4th retry */
-#define MAX_HD		2
-
-#define STAT_OK		(READY_STAT|SEEK_STAT)
-#define OK_STATUS(s)	(((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK)
-
-static void recal_intr(void);
-static void bad_rw_intr(void);
-
-static int reset;
-static int hd_error;
-
-/*
- *  This struct defines the HD's and their types.
- */
-struct hd_i_struct {
-	unsigned int head, sect, cyl, wpcom, lzone, ctl;
-	int unit;
-	int recalibrate;
-	int special_op;
-};
-
-#ifdef HD_TYPE
-static struct hd_i_struct hd_info[] = { HD_TYPE };
-static int NR_HD = ARRAY_SIZE(hd_info);
-#else
-static struct hd_i_struct hd_info[MAX_HD];
-static int NR_HD;
-#endif
-
-static struct gendisk *hd_gendisk[MAX_HD];
-
-static struct timer_list device_timer;
-
-#define TIMEOUT_VALUE (6*HZ)
-
-#define SET_TIMER							\
-	do {								\
-		mod_timer(&device_timer, jiffies + TIMEOUT_VALUE);	\
-	} while (0)
-
-static void (*do_hd)(void) = NULL;
-#define SET_HANDLER(x) \
-if ((do_hd = (x)) != NULL) \
-	SET_TIMER; \
-else \
-	del_timer(&device_timer);
-
-
-#if (HD_DELAY > 0)
-
-#include <linux/i8253.h>
-
-unsigned long last_req;
-
-unsigned long read_timer(void)
-{
-	unsigned long t, flags;
-	int i;
-
-	raw_spin_lock_irqsave(&i8253_lock, flags);
-	t = jiffies * 11932;
-	outb_p(0, 0x43);
-	i = inb_p(0x40);
-	i |= inb(0x40) << 8;
-	raw_spin_unlock_irqrestore(&i8253_lock, flags);
-	return(t - i);
-}
-#endif
-
-static void __init hd_setup(char *str, int *ints)
-{
-	int hdind = 0;
-
-	if (ints[0] != 3)
-		return;
-	if (hd_info[0].head != 0)
-		hdind = 1;
-	hd_info[hdind].head = ints[2];
-	hd_info[hdind].sect = ints[3];
-	hd_info[hdind].cyl = ints[1];
-	hd_info[hdind].wpcom = 0;
-	hd_info[hdind].lzone = ints[1];
-	hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0);
-	NR_HD = hdind+1;
-}
-
-static bool hd_end_request(int err, unsigned int bytes)
-{
-	if (__blk_end_request(hd_req, err, bytes))
-		return true;
-	hd_req = NULL;
-	return false;
-}
-
-static bool hd_end_request_cur(int err)
-{
-	return hd_end_request(err, blk_rq_cur_bytes(hd_req));
-}
-
-static void dump_status(const char *msg, unsigned int stat)
-{
-	char *name = "hd?";
-	if (hd_req)
-		name = hd_req->rq_disk->disk_name;
-
-#ifdef VERBOSE_ERRORS
-	printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
-	if (stat & BUSY_STAT)	printk("Busy ");
-	if (stat & READY_STAT)	printk("DriveReady ");
-	if (stat & WRERR_STAT)	printk("WriteFault ");
-	if (stat & SEEK_STAT)	printk("SeekComplete ");
-	if (stat & DRQ_STAT)	printk("DataRequest ");
-	if (stat & ECC_STAT)	printk("CorrectedError ");
-	if (stat & INDEX_STAT)	printk("Index ");
-	if (stat & ERR_STAT)	printk("Error ");
-	printk("}\n");
-	if ((stat & ERR_STAT) == 0) {
-		hd_error = 0;
-	} else {
-		hd_error = inb(HD_ERROR);
-		printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff);
-		if (hd_error & BBD_ERR)		printk("BadSector ");
-		if (hd_error & ECC_ERR)		printk("UncorrectableError ");
-		if (hd_error & ID_ERR)		printk("SectorIdNotFound ");
-		if (hd_error & ABRT_ERR)	printk("DriveStatusError ");
-		if (hd_error & TRK0_ERR)	printk("TrackZeroNotFound ");
-		if (hd_error & MARK_ERR)	printk("AddrMarkNotFound ");
-		printk("}");
-		if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
-			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
-				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
-			if (hd_req)
-				printk(", sector=%ld", blk_rq_pos(hd_req));
-		}
-		printk("\n");
-	}
-#else
-	printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff);
-	if ((stat & ERR_STAT) == 0) {
-		hd_error = 0;
-	} else {
-		hd_error = inb(HD_ERROR);
-		printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff);
-	}
-#endif
-}
-
-static void check_status(void)
-{
-	int i = inb_p(HD_STATUS);
-
-	if (!OK_STATUS(i)) {
-		dump_status("check_status", i);
-		bad_rw_intr();
-	}
-}
-
-static int controller_busy(void)
-{
-	int retries = 100000;
-	unsigned char status;
-
-	do {
-		status = inb_p(HD_STATUS);
-	} while ((status & BUSY_STAT) && --retries);
-	return status;
-}
-
-static int status_ok(void)
-{
-	unsigned char status = inb_p(HD_STATUS);
-
-	if (status & BUSY_STAT)
-		return 1;	/* Ancient, but does it make sense??? */
-	if (status & WRERR_STAT)
-		return 0;
-	if (!(status & READY_STAT))
-		return 0;
-	if (!(status & SEEK_STAT))
-		return 0;
-	return 1;
-}
-
-static int controller_ready(unsigned int drive, unsigned int head)
-{
-	int retry = 100;
-
-	do {
-		if (controller_busy() & BUSY_STAT)
-			return 0;
-		outb_p(0xA0 | (drive<<4) | head, HD_CURRENT);
-		if (status_ok())
-			return 1;
-	} while (--retry);
-	return 0;
-}
-
-static void hd_out(struct hd_i_struct *disk,
-		   unsigned int nsect,
-		   unsigned int sect,
-		   unsigned int head,
-		   unsigned int cyl,
-		   unsigned int cmd,
-		   void (*intr_addr)(void))
-{
-	unsigned short port;
-
-#if (HD_DELAY > 0)
-	while (read_timer() - last_req < HD_DELAY)
-		/* nothing */;
-#endif
-	if (reset)
-		return;
-	if (!controller_ready(disk->unit, head)) {
-		reset = 1;
-		return;
-	}
-	SET_HANDLER(intr_addr);
-	outb_p(disk->ctl, HD_CMD);
-	port = HD_DATA;
-	outb_p(disk->wpcom >> 2, ++port);
-	outb_p(nsect, ++port);
-	outb_p(sect, ++port);
-	outb_p(cyl, ++port);
-	outb_p(cyl >> 8, ++port);
-	outb_p(0xA0 | (disk->unit << 4) | head, ++port);
-	outb_p(cmd, ++port);
-}
-
-static void hd_request (void);
-
-static int drive_busy(void)
-{
-	unsigned int i;
-	unsigned char c;
-
-	for (i = 0; i < 500000 ; i++) {
-		c = inb_p(HD_STATUS);
-		if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK)
-			return 0;
-	}
-	dump_status("reset timed out", c);
-	return 1;
-}
-
-static void reset_controller(void)
-{
-	int	i;
-
-	outb_p(4, HD_CMD);
-	for (i = 0; i < 1000; i++) barrier();
-	outb_p(hd_info[0].ctl & 0x0f, HD_CMD);
-	for (i = 0; i < 1000; i++) barrier();
-	if (drive_busy())
-		printk("hd: controller still busy\n");
-	else if ((hd_error = inb(HD_ERROR)) != 1)
-		printk("hd: controller reset failed: %02x\n", hd_error);
-}
-
-static void reset_hd(void)
-{
-	static int i;
-
-repeat:
-	if (reset) {
-		reset = 0;
-		i = -1;
-		reset_controller();
-	} else {
-		check_status();
-		if (reset)
-			goto repeat;
-	}
-	if (++i < NR_HD) {
-		struct hd_i_struct *disk = &hd_info[i];
-		disk->special_op = disk->recalibrate = 1;
-		hd_out(disk, disk->sect, disk->sect, disk->head-1,
-			disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd);
-		if (reset)
-			goto repeat;
-	} else
-		hd_request();
-}
-
-/*
- * Ok, don't know what to do with the unexpected interrupts: on some machines
- * doing a reset and a retry seems to result in an eternal loop. Right now I
- * ignore it, and just set the timeout.
- *
- * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
- * drive enters "idle", "standby", or "sleep" mode, so if the status looks
- * "good", we just ignore the interrupt completely.
- */
-static void unexpected_hd_interrupt(void)
-{
-	unsigned int stat = inb_p(HD_STATUS);
-
-	if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) {
-		dump_status("unexpected interrupt", stat);
-		SET_TIMER;
-	}
-}
-
-/*
- * bad_rw_intr() now tries to be a bit smarter and does things
- * according to the error returned by the controller.
- * -Mika Liljeberg (liljeber@cs.Helsinki.FI)
- */
-static void bad_rw_intr(void)
-{
-	struct request *req = hd_req;
-
-	if (req != NULL) {
-		struct hd_i_struct *disk = req->rq_disk->private_data;
-		if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
-			hd_end_request_cur(-EIO);
-			disk->special_op = disk->recalibrate = 1;
-		} else if (req->errors % RESET_FREQ == 0)
-			reset = 1;
-		else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0)
-			disk->special_op = disk->recalibrate = 1;
-		/* Otherwise just retry */
-	}
-}
-
-static inline int wait_DRQ(void)
-{
-	int retries;
-	int stat;
-
-	for (retries = 0; retries < 100000; retries++) {
-		stat = inb_p(HD_STATUS);
-		if (stat & DRQ_STAT)
-			return 0;
-	}
-	dump_status("wait_DRQ", stat);
-	return -1;
-}
-
-static void read_intr(void)
-{
-	struct request *req;
-	int i, retries = 100000;
-
-	do {
-		i = (unsigned) inb_p(HD_STATUS);
-		if (i & BUSY_STAT)
-			continue;
-		if (!OK_STATUS(i))
-			break;
-		if (i & DRQ_STAT)
-			goto ok_to_read;
-	} while (--retries > 0);
-	dump_status("read_intr", i);
-	bad_rw_intr();
-	hd_request();
-	return;
-
-ok_to_read:
-	req = hd_req;
-	insw(HD_DATA, bio_data(req->bio), 256);
-#ifdef DEBUG
-	printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
-	       req->rq_disk->disk_name, blk_rq_pos(req) + 1,
-	       blk_rq_sectors(req) - 1, bio_data(req->bio)+512);
-#endif
-	if (hd_end_request(0, 512)) {
-		SET_HANDLER(&read_intr);
-		return;
-	}
-
-	(void) inb_p(HD_STATUS);
-#if (HD_DELAY > 0)
-	last_req = read_timer();
-#endif
-	hd_request();
-}
-
-static void write_intr(void)
-{
-	struct request *req = hd_req;
-	int i;
-	int retries = 100000;
-
-	do {
-		i = (unsigned) inb_p(HD_STATUS);
-		if (i & BUSY_STAT)
-			continue;
-		if (!OK_STATUS(i))
-			break;
-		if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT))
-			goto ok_to_write;
-	} while (--retries > 0);
-	dump_status("write_intr", i);
-	bad_rw_intr();
-	hd_request();
-	return;
-
-ok_to_write:
-	if (hd_end_request(0, 512)) {
-		SET_HANDLER(&write_intr);
-		outsw(HD_DATA, bio_data(req->bio), 256);
-		return;
-	}
-
-#if (HD_DELAY > 0)
-	last_req = read_timer();
-#endif
-	hd_request();
-}
-
-static void recal_intr(void)
-{
-	check_status();
-#if (HD_DELAY > 0)
-	last_req = read_timer();
-#endif
-	hd_request();
-}
-
-/*
- * This is another of the error-routines I don't know what to do with. The
- * best idea seems to just set reset, and start all over again.
- */
-static void hd_times_out(unsigned long dummy)
-{
-	char *name;
-
-	do_hd = NULL;
-
-	if (!hd_req)
-		return;
-
-	spin_lock_irq(hd_queue->queue_lock);
-	reset = 1;
-	name = hd_req->rq_disk->disk_name;
-	printk("%s: timeout\n", name);
-	if (++hd_req->errors >= MAX_ERRORS) {
-#ifdef DEBUG
-		printk("%s: too many errors\n", name);
-#endif
-		hd_end_request_cur(-EIO);
-	}
-	hd_request();
-	spin_unlock_irq(hd_queue->queue_lock);
-}
-
-static int do_special_op(struct hd_i_struct *disk, struct request *req)
-{
-	if (disk->recalibrate) {
-		disk->recalibrate = 0;
-		hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr);
-		return reset;
-	}
-	if (disk->head > 16) {
-		printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
-		hd_end_request_cur(-EIO);
-	}
-	disk->special_op = 0;
-	return 1;
-}
-
-/*
- * The driver enables interrupts as much as possible.  In order to do this,
- * (a) the device-interrupt is disabled before entering hd_request(),
- * and (b) the timeout-interrupt is disabled before the sti().
- *
- * Interrupts are still masked (by default) whenever we are exchanging
- * data/cmds with a drive, because some drives seem to have very poor
- * tolerance for latency during I/O. The IDE driver has support to unmask
- * interrupts for non-broken hardware, so use that driver if required.
- */
-static void hd_request(void)
-{
-	unsigned int block, nsect, sec, track, head, cyl;
-	struct hd_i_struct *disk;
-	struct request *req;
-
-	if (do_hd)
-		return;
-repeat:
-	del_timer(&device_timer);
-
-	if (!hd_req) {
-		hd_req = blk_fetch_request(hd_queue);
-		if (!hd_req) {
-			do_hd = NULL;
-			return;
-		}
-	}
-	req = hd_req;
-
-	if (reset) {
-		reset_hd();
-		return;
-	}
-	disk = req->rq_disk->private_data;
-	block = blk_rq_pos(req);
-	nsect = blk_rq_sectors(req);
-	if (block >= get_capacity(req->rq_disk) ||
-	    ((block+nsect) > get_capacity(req->rq_disk))) {
-		printk("%s: bad access: block=%d, count=%d\n",
-			req->rq_disk->disk_name, block, nsect);
-		hd_end_request_cur(-EIO);
-		goto repeat;
-	}
-
-	if (disk->special_op) {
-		if (do_special_op(disk, req))
-			goto repeat;
-		return;
-	}
-	sec   = block % disk->sect + 1;
-	track = block / disk->sect;
-	head  = track % disk->head;
-	cyl   = track / disk->head;
-#ifdef DEBUG
-	printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
-		req->rq_disk->disk_name,
-		req_data_dir(req) == READ ? "read" : "writ",
-		cyl, head, sec, nsect, bio_data(req->bio));
-#endif
-
-	switch (req_op(req)) {
-	case REQ_OP_READ:
-		hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,
-			&read_intr);
-		if (reset)
-			goto repeat;
-		break;
-	case REQ_OP_WRITE:
-		hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE,
-			&write_intr);
-		if (reset)
-			goto repeat;
-		if (wait_DRQ()) {
-			bad_rw_intr();
-			goto repeat;
-		}
-		outsw(HD_DATA, bio_data(req->bio), 256);
-		break;
-	default:
-		printk("unknown hd-command\n");
-		hd_end_request_cur(-EIO);
-		break;
-	}
-}
-
-static void do_hd_request(struct request_queue *q)
-{
-	hd_request();
-}
-
-static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct hd_i_struct *disk = bdev->bd_disk->private_data;
-
-	geo->heads = disk->head;
-	geo->sectors = disk->sect;
-	geo->cylinders = disk->cyl;
-	return 0;
-}
-
-/*
- * Releasing a block device means we sync() it, so that it can safely
- * be forgotten about...
- */
-
-static irqreturn_t hd_interrupt(int irq, void *dev_id)
-{
-	void (*handler)(void) = do_hd;
-
-	spin_lock(hd_queue->queue_lock);
-
-	do_hd = NULL;
-	del_timer(&device_timer);
-	if (!handler)
-		handler = unexpected_hd_interrupt;
-	handler();
-
-	spin_unlock(hd_queue->queue_lock);
-
-	return IRQ_HANDLED;
-}
-
-static const struct block_device_operations hd_fops = {
-	.getgeo =	hd_getgeo,
-};
-
-static int __init hd_init(void)
-{
-	int drive;
-
-	if (register_blkdev(HD_MAJOR, "hd"))
-		return -1;
-
-	hd_queue = blk_init_queue(do_hd_request, &hd_lock);
-	if (!hd_queue) {
-		unregister_blkdev(HD_MAJOR, "hd");
-		return -ENOMEM;
-	}
-
-	blk_queue_max_hw_sectors(hd_queue, 255);
-	init_timer(&device_timer);
-	device_timer.function = hd_times_out;
-	blk_queue_logical_block_size(hd_queue, 512);
-
-	if (!NR_HD) {
-		/*
-		 * We don't know anything about the drive.  This means
-		 * that you *MUST* specify the drive parameters to the
-		 * kernel yourself.
-		 *
-		 * If we were on an i386, we used to read this info from
-		 * the BIOS or CMOS.  This doesn't work all that well,
-		 * since this assumes that this is a primary or secondary
-		 * drive, and if we're using this legacy driver, it's
-		 * probably an auxiliary controller added to recover
-		 * legacy data off an ST-506 drive.  Either way, it's
-		 * definitely safest to have the user explicitly specify
-		 * the information.
-		 */
-		printk("hd: no drives specified - use hd=cyl,head,sectors"
-			" on kernel command line\n");
-		goto out;
-	}
-
-	for (drive = 0 ; drive < NR_HD ; drive++) {
-		struct gendisk *disk = alloc_disk(64);
-		struct hd_i_struct *p = &hd_info[drive];
-		if (!disk)
-			goto Enomem;
-		disk->major = HD_MAJOR;
-		disk->first_minor = drive << 6;
-		disk->fops = &hd_fops;
-		sprintf(disk->disk_name, "hd%c", 'a'+drive);
-		disk->private_data = p;
-		set_capacity(disk, p->head * p->sect * p->cyl);
-		disk->queue = hd_queue;
-		p->unit = drive;
-		hd_gendisk[drive] = disk;
-		printk("%s: %luMB, CHS=%d/%d/%d\n",
-			disk->disk_name, (unsigned long)get_capacity(disk)/2048,
-			p->cyl, p->head, p->sect);
-	}
-
-	if (request_irq(HD_IRQ, hd_interrupt, 0, "hd", NULL)) {
-		printk("hd: unable to get IRQ%d for the hard disk driver\n",
-			HD_IRQ);
-		goto out1;
-	}
-	if (!request_region(HD_DATA, 8, "hd")) {
-		printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA);
-		goto out2;
-	}
-	if (!request_region(HD_CMD, 1, "hd(cmd)")) {
-		printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD);
-		goto out3;
-	}
-
-	/* Let them fly */
-	for (drive = 0; drive < NR_HD; drive++)
-		add_disk(hd_gendisk[drive]);
-
-	return 0;
-
-out3:
-	release_region(HD_DATA, 8);
-out2:
-	free_irq(HD_IRQ, NULL);
-out1:
-	for (drive = 0; drive < NR_HD; drive++)
-		put_disk(hd_gendisk[drive]);
-	NR_HD = 0;
-out:
-	del_timer(&device_timer);
-	unregister_blkdev(HD_MAJOR, "hd");
-	blk_cleanup_queue(hd_queue);
-	return -1;
-Enomem:
-	while (drive--)
-		put_disk(hd_gendisk[drive]);
-	goto out;
-}
-
-static int __init parse_hd_setup(char *line)
-{
-	int ints[6];
-
-	(void) get_options(line, ARRAY_SIZE(ints), ints);
-	hd_setup(NULL, ints);
-
-	return 1;
-}
-__setup("hd=", parse_hd_setup);
-
-late_initcall(hd_init);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0ecb6461ed81..994403efee19 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -445,32 +445,27 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
 	return ret;
 }
 
-static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
+static void lo_complete_rq(struct request *rq)
 {
-	if (bytes < 0 || op_is_write(req_op(cmd->rq)))
-		return;
+	struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-	if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
+	if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio &&
+		     cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) {
 		struct bio *bio = cmd->rq->bio;
 
-		bio_advance(bio, bytes);
+		bio_advance(bio, cmd->ret);
 		zero_fill_bio(bio);
 	}
+
+	blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
 {
 	struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
-	struct request *rq = cmd->rq;
-
-	handle_partial_read(cmd, ret);
 
-	if (ret > 0)
-		ret = 0;
-	else if (ret < 0)
-		ret = -EIO;
-
-	blk_mq_complete_request(rq, ret);
+	cmd->ret = ret;
+	blk_mq_complete_request(cmd->rq);
 }
 
 static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
@@ -528,6 +523,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 	case REQ_OP_FLUSH:
 		return lo_req_flush(lo, rq);
 	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
 		return lo_discard(lo, rq, pos);
 	case REQ_OP_WRITE:
 		if (lo->transfer)
@@ -826,7 +822,7 @@ static void loop_config_discard(struct loop_device *lo)
 		q->limits.discard_granularity = 0;
 		q->limits.discard_alignment = 0;
 		blk_queue_max_discard_sectors(q, 0);
-		q->limits.discard_zeroes_data = 0;
+		blk_queue_max_write_zeroes_sectors(q, 0);
 		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
 		return;
 	}
@@ -834,7 +830,7 @@ static void loop_config_discard(struct loop_device *lo)
 	q->limits.discard_granularity = inode->i_sb->s_blocksize;
 	q->limits.discard_alignment = 0;
 	blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
-	q->limits.discard_zeroes_data = 1;
+	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
@@ -1660,6 +1656,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	switch (req_op(cmd->rq)) {
 	case REQ_OP_FLUSH:
 	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
 		cmd->use_aio = false;
 		break;
 	default:
@@ -1686,8 +1683,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 	ret = do_req_filebacked(lo, cmd->rq);
  failed:
 	/* complete non-aio request */
-	if (!cmd->use_aio || ret)
-		blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
+	if (!cmd->use_aio || ret) {
+		cmd->ret = ret ? -EIO : 0;
+		blk_mq_complete_request(cmd->rq);
+	}
 }
 
 static void loop_queue_work(struct kthread_work *work)
@@ -1710,9 +1709,10 @@ static int loop_init_request(void *data, struct request *rq,
 	return 0;
 }
 
-static struct blk_mq_ops loop_mq_ops = {
+static const struct blk_mq_ops loop_mq_ops = {
 	.queue_rq       = loop_queue_rq,
 	.init_request	= loop_init_request,
+	.complete	= lo_complete_rq,
 };
 
 static int loop_add(struct loop_device **l, int i)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fb2237c73e61..fecd3f97ef8c 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -70,6 +70,7 @@ struct loop_cmd {
 	struct request *rq;
 	struct list_head list;
 	bool use_aio;           /* use AIO interface to handle I/O */
+	long ret;
 	struct kiocb iocb;
 };
 
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
deleted file mode 100644
index 286f276f586e..000000000000
--- a/drivers/block/mg_disk.c
+++ /dev/null
@@ -1,1112 +0,0 @@
-/*
- *  drivers/block/mg_disk.c
- *
- *  Support for the mGine m[g]flash IO mode.
- *  Based on legacy hd.c
- *
- * (c) 2008 mGine Co.,LTD
- * (c) 2008 unsik Kim <donari75@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/blkdev.h>
-#include <linux/hdreg.h>
-#include <linux/ata.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-#include <linux/mg_disk.h>
-#include <linux/slab.h>
-
-#define MG_RES_SEC (CONFIG_MG_DISK_RES << 1)
-
-/* name for block device */
-#define MG_DISK_NAME "mgd"
-
-#define MG_DISK_MAJ 0
-#define MG_DISK_MAX_PART 16
-#define MG_SECTOR_SIZE 512
-#define MG_MAX_SECTS 256
-
-/* Register offsets */
-#define MG_BUFF_OFFSET			0x8000
-#define MG_REG_OFFSET			0xC000
-#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
-#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
-#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
-#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
-#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
-#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
-#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
-#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
-#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
-#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
-#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
-
-/* handy status */
-#define MG_STAT_READY	(ATA_DRDY | ATA_DSC)
-#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \
-				 ATA_ERR))) == MG_STAT_READY)
-
-/* error code for others */
-#define MG_ERR_NONE		0
-#define MG_ERR_TIMEOUT		0x100
-#define MG_ERR_INIT_STAT	0x101
-#define MG_ERR_TRANSLATION	0x102
-#define MG_ERR_CTRL_RST		0x103
-#define MG_ERR_INV_STAT		0x104
-#define MG_ERR_RSTOUT		0x105
-
-#define MG_MAX_ERRORS	6	/* Max read/write errors */
-
-/* command */
-#define MG_CMD_RD 0x20
-#define MG_CMD_WR 0x30
-#define MG_CMD_SLEEP 0x99
-#define MG_CMD_WAKEUP 0xC3
-#define MG_CMD_ID 0xEC
-#define MG_CMD_WR_CONF 0x3C
-#define MG_CMD_RD_CONF 0x40
-
-/* operation mode */
-#define MG_OP_CASCADE (1 << 0)
-#define MG_OP_CASCADE_SYNC_RD (1 << 1)
-#define MG_OP_CASCADE_SYNC_WR (1 << 2)
-#define MG_OP_INTERLEAVE (1 << 3)
-
-/* synchronous */
-#define MG_BURST_LAT_4 (3 << 4)
-#define MG_BURST_LAT_5 (4 << 4)
-#define MG_BURST_LAT_6 (5 << 4)
-#define MG_BURST_LAT_7 (6 << 4)
-#define MG_BURST_LAT_8 (7 << 4)
-#define MG_BURST_LEN_4 (1 << 1)
-#define MG_BURST_LEN_8 (2 << 1)
-#define MG_BURST_LEN_16 (3 << 1)
-#define MG_BURST_LEN_32 (4 << 1)
-#define MG_BURST_LEN_CONT (0 << 1)
-
-/* timeout value (unit: ms) */
-#define MG_TMAX_CONF_TO_CMD	1
-#define MG_TMAX_WAIT_RD_DRQ	10
-#define MG_TMAX_WAIT_WR_DRQ	500
-#define MG_TMAX_RST_TO_BUSY	10
-#define MG_TMAX_HDRST_TO_RDY	500
-#define MG_TMAX_SWRST_TO_RDY	500
-#define MG_TMAX_RSTOUT		3000
-
-#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
-
-/* main structure for mflash driver */
-struct mg_host {
-	struct device *dev;
-
-	struct request_queue *breq;
-	struct request *req;
-	spinlock_t lock;
-	struct gendisk *gd;
-
-	struct timer_list timer;
-	void (*mg_do_intr) (struct mg_host *);
-
-	u16 id[ATA_ID_WORDS];
-
-	u16 cyls;
-	u16 heads;
-	u16 sectors;
-	u32 n_sectors;
-	u32 nres_sectors;
-
-	void __iomem *dev_base;
-	unsigned int irq;
-	unsigned int rst;
-	unsigned int rstout;
-
-	u32 major;
-	u32 error;
-};
-
-/*
- * Debugging macro and defines
- */
-#undef DO_MG_DEBUG
-#ifdef DO_MG_DEBUG
-#  define MG_DBG(fmt, args...) \
-	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
-#else /* CONFIG_MG_DEBUG */
-#  define MG_DBG(fmt, args...) do { } while (0)
-#endif /* CONFIG_MG_DEBUG */
-
-static void mg_request(struct request_queue *);
-
-static bool mg_end_request(struct mg_host *host, int err, unsigned int nr_bytes)
-{
-	if (__blk_end_request(host->req, err, nr_bytes))
-		return true;
-
-	host->req = NULL;
-	return false;
-}
-
-static bool mg_end_request_cur(struct mg_host *host, int err)
-{
-	return mg_end_request(host, err, blk_rq_cur_bytes(host->req));
-}
-
-static void mg_dump_status(const char *msg, unsigned int stat,
-		struct mg_host *host)
-{
-	char *name = MG_DISK_NAME;
-
-	if (host->req)
-		name = host->req->rq_disk->disk_name;
-
-	printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
-	if (stat & ATA_BUSY)
-		printk("Busy ");
-	if (stat & ATA_DRDY)
-		printk("DriveReady ");
-	if (stat & ATA_DF)
-		printk("WriteFault ");
-	if (stat & ATA_DSC)
-		printk("SeekComplete ");
-	if (stat & ATA_DRQ)
-		printk("DataRequest ");
-	if (stat & ATA_CORR)
-		printk("CorrectedError ");
-	if (stat & ATA_ERR)
-		printk("Error ");
-	printk("}\n");
-	if ((stat & ATA_ERR) == 0) {
-		host->error = 0;
-	} else {
-		host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR);
-		printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg,
-				host->error & 0xff);
-		if (host->error & ATA_BBK)
-			printk("BadSector ");
-		if (host->error & ATA_UNC)
-			printk("UncorrectableError ");
-		if (host->error & ATA_IDNF)
-			printk("SectorIdNotFound ");
-		if (host->error & ATA_ABORTED)
-			printk("DriveStatusError ");
-		if (host->error & ATA_AMNF)
-			printk("AddrMarkNotFound ");
-		printk("}");
-		if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) {
-			if (host->req)
-				printk(", sector=%u",
-				       (unsigned int)blk_rq_pos(host->req));
-		}
-		printk("\n");
-	}
-}
-
-static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec)
-{
-	u8 status;
-	unsigned long expire, cur_jiffies;
-	struct mg_drv_data *prv_data = host->dev->platform_data;
-
-	host->error = MG_ERR_NONE;
-	expire = jiffies + msecs_to_jiffies(msec);
-
-	/* These 2 times dummy status read prevents reading invalid
-	 * status. A very little time (3 times of mflash operating clk)
-	 * is required for busy bit is set. Use dummy read instead of
-	 * busy wait, because mflash's PLL is machine dependent.
-	 */
-	if (prv_data->use_polling) {
-		status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-		status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-	}
-
-	status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-
-	do {
-		cur_jiffies = jiffies;
-		if (status & ATA_BUSY) {
-			if (expect == ATA_BUSY)
-				break;
-		} else {
-			/* Check the error condition! */
-			if (status & ATA_ERR) {
-				mg_dump_status("mg_wait", status, host);
-				break;
-			}
-
-			if (expect == MG_STAT_READY)
-				if (MG_READY_OK(status))
-					break;
-
-			if (expect == ATA_DRQ)
-				if (status & ATA_DRQ)
-					break;
-		}
-		if (!msec) {
-			mg_dump_status("not ready", status, host);
-			return MG_ERR_INV_STAT;
-		}
-
-		status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-	} while (time_before(cur_jiffies, expire));
-
-	if (time_after_eq(cur_jiffies, expire) && msec)
-		host->error = MG_ERR_TIMEOUT;
-
-	return host->error;
-}
-
-static unsigned int mg_wait_rstout(u32 rstout, u32 msec)
-{
-	unsigned long expire;
-
-	expire = jiffies + msecs_to_jiffies(msec);
-	while (time_before(jiffies, expire)) {
-		if (gpio_get_value(rstout) == 1)
-			return MG_ERR_NONE;
-		msleep(10);
-	}
-
-	return MG_ERR_RSTOUT;
-}
-
-static void mg_unexpected_intr(struct mg_host *host)
-{
-	u32 status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-
-	mg_dump_status("mg_unexpected_intr", status, host);
-}
-
-static irqreturn_t mg_irq(int irq, void *dev_id)
-{
-	struct mg_host *host = dev_id;
-	void (*handler)(struct mg_host *) = host->mg_do_intr;
-
-	spin_lock(&host->lock);
-
-	host->mg_do_intr = NULL;
-	del_timer(&host->timer);
-	if (!handler)
-		handler = mg_unexpected_intr;
-	handler(host);
-
-	spin_unlock(&host->lock);
-
-	return IRQ_HANDLED;
-}
-
-/* local copy of ata_id_string() */
-static void mg_id_string(const u16 *id, unsigned char *s,
-			 unsigned int ofs, unsigned int len)
-{
-	unsigned int c;
-
-	BUG_ON(len & 1);
-
-	while (len > 0) {
-		c = id[ofs] >> 8;
-		*s = c;
-		s++;
-
-		c = id[ofs] & 0xff;
-		*s = c;
-		s++;
-
-		ofs++;
-		len -= 2;
-	}
-}
-
-/* local copy of ata_id_c_string() */
-static void mg_id_c_string(const u16 *id, unsigned char *s,
-			   unsigned int ofs, unsigned int len)
-{
-	unsigned char *p;
-
-	mg_id_string(id, s, ofs, len - 1);
-
-	p = s + strnlen(s, len - 1);
-	while (p > s && p[-1] == ' ')
-		p--;
-	*p = '\0';
-}
-
-static int mg_get_disk_id(struct mg_host *host)
-{
-	u32 i;
-	s32 err;
-	const u16 *id = host->id;
-	struct mg_drv_data *prv_data = host->dev->platform_data;
-	char fwrev[ATA_ID_FW_REV_LEN + 1];
-	char model[ATA_ID_PROD_LEN + 1];
-	char serial[ATA_ID_SERNO_LEN + 1];
-
-	if (!prv_data->use_polling)
-		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-	outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND);
-	err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ);
-	if (err)
-		return err;
-
-	for (i = 0; i < (MG_SECTOR_SIZE >> 1); i++)
-		host->id[i] = le16_to_cpu(inw((unsigned long)host->dev_base +
-					MG_BUFF_OFFSET + i * 2));
-
-	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
-	err = mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD);
-	if (err)
-		return err;
-
-	if ((id[ATA_ID_FIELD_VALID] & 1) == 0)
-		return MG_ERR_TRANSLATION;
-
-	host->n_sectors = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
-	host->cyls = id[ATA_ID_CYLS];
-	host->heads = id[ATA_ID_HEADS];
-	host->sectors = id[ATA_ID_SECTORS];
-
-	if (MG_RES_SEC && host->heads && host->sectors) {
-		/* modify cyls, n_sectors */
-		host->cyls = (host->n_sectors - MG_RES_SEC) /
-			host->heads / host->sectors;
-		host->nres_sectors = host->n_sectors - host->cyls *
-			host->heads * host->sectors;
-		host->n_sectors -= host->nres_sectors;
-	}
-
-	mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev));
-	mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
-	mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial));
-	printk(KERN_INFO "mg_disk: model: %s\n", model);
-	printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev);
-	printk(KERN_INFO "mg_disk: serial: %s\n", serial);
-	printk(KERN_INFO "mg_disk: %d + reserved %d sectors\n",
-			host->n_sectors, host->nres_sectors);
-
-	if (!prv_data->use_polling)
-		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-	return err;
-}
-
-
-static int mg_disk_init(struct mg_host *host)
-{
-	struct mg_drv_data *prv_data = host->dev->platform_data;
-	s32 err;
-	u8 init_status;
-
-	/* hdd rst low */
-	gpio_set_value(host->rst, 0);
-	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
-	if (err)
-		return err;
-
-	/* hdd rst high */
-	gpio_set_value(host->rst, 1);
-	err = mg_wait(host, MG_STAT_READY, MG_TMAX_HDRST_TO_RDY);
-	if (err)
-		return err;
-
-	/* soft reset on */
-	outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0),
-			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
-	if (err)
-		return err;
-
-	/* soft reset off */
-	outb(prv_data->use_polling ? ATA_NIEN : 0,
-			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-	err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY);
-	if (err)
-		return err;
-
-	init_status = inb((unsigned long)host->dev_base + MG_REG_STATUS) & 0xf;
-
-	if (init_status == 0xf)
-		return MG_ERR_INIT_STAT;
-
-	return err;
-}
-
-static void mg_bad_rw_intr(struct mg_host *host)
-{
-	if (host->req)
-		if (++host->req->errors >= MG_MAX_ERRORS ||
-		    host->error == MG_ERR_TIMEOUT)
-			mg_end_request_cur(host, -EIO);
-}
-
-static unsigned int mg_out(struct mg_host *host,
-		unsigned int sect_num,
-		unsigned int sect_cnt,
-		unsigned int cmd,
-		void (*intr_addr)(struct mg_host *))
-{
-	struct mg_drv_data *prv_data = host->dev->platform_data;
-
-	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-		return host->error;
-
-	if (!prv_data->use_polling) {
-		host->mg_do_intr = intr_addr;
-		mod_timer(&host->timer, jiffies + 3 * HZ);
-	}
-	if (MG_RES_SEC)
-		sect_num += MG_RES_SEC;
-	outb((u8)sect_cnt, (unsigned long)host->dev_base + MG_REG_SECT_CNT);
-	outb((u8)sect_num, (unsigned long)host->dev_base + MG_REG_SECT_NUM);
-	outb((u8)(sect_num >> 8), (unsigned long)host->dev_base +
-			MG_REG_CYL_LOW);
-	outb((u8)(sect_num >> 16), (unsigned long)host->dev_base +
-			MG_REG_CYL_HIGH);
-	outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS),
-			(unsigned long)host->dev_base + MG_REG_DRV_HEAD);
-	outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND);
-	return MG_ERR_NONE;
-}
-
-static void mg_read_one(struct mg_host *host, struct request *req)
-{
-	u16 *buff = (u16 *)bio_data(req->bio);
-	u32 i;
-
-	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
-		*buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET +
-			      (i << 1));
-}
-
-static void mg_read(struct request *req)
-{
-	struct mg_host *host = req->rq_disk->private_data;
-
-	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
-		   MG_CMD_RD, NULL) != MG_ERR_NONE)
-		mg_bad_rw_intr(host);
-
-	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio));
-
-	do {
-		if (mg_wait(host, ATA_DRQ,
-			    MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
-			mg_bad_rw_intr(host);
-			return;
-		}
-
-		mg_read_one(host, req);
-
-		outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base +
-				MG_REG_COMMAND);
-	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
-}
-
-static void mg_write_one(struct mg_host *host, struct request *req)
-{
-	u16 *buff = (u16 *)bio_data(req->bio);
-	u32 i;
-
-	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
-		outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET +
-		     (i << 1));
-}
-
-static void mg_write(struct request *req)
-{
-	struct mg_host *host = req->rq_disk->private_data;
-	unsigned int rem = blk_rq_sectors(req);
-
-	if (mg_out(host, blk_rq_pos(req), rem,
-		   MG_CMD_WR, NULL) != MG_ERR_NONE) {
-		mg_bad_rw_intr(host);
-		return;
-	}
-
-	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       rem, blk_rq_pos(req), bio_data(req->bio));
-
-	if (mg_wait(host, ATA_DRQ,
-		    MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
-		mg_bad_rw_intr(host);
-		return;
-	}
-
-	do {
-		mg_write_one(host, req);
-
-		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
-				MG_REG_COMMAND);
-
-		rem--;
-		if (rem > 1 && mg_wait(host, ATA_DRQ,
-					MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
-			mg_bad_rw_intr(host);
-			return;
-		} else if (mg_wait(host, MG_STAT_READY,
-					MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
-			mg_bad_rw_intr(host);
-			return;
-		}
-	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
-}
-
-static void mg_read_intr(struct mg_host *host)
-{
-	struct request *req = host->req;
-	u32 i;
-
-	/* check status */
-	do {
-		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-		if (i & ATA_BUSY)
-			break;
-		if (!MG_READY_OK(i))
-			break;
-		if (i & ATA_DRQ)
-			goto ok_to_read;
-	} while (0);
-	mg_dump_status("mg_read_intr", i, host);
-	mg_bad_rw_intr(host);
-	mg_request(host->breq);
-	return;
-
-ok_to_read:
-	mg_read_one(host, req);
-
-	MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-	       blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio));
-
-	/* send read confirm */
-	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
-
-	if (mg_end_request(host, 0, MG_SECTOR_SIZE)) {
-		/* set handler if read remains */
-		host->mg_do_intr = mg_read_intr;
-		mod_timer(&host->timer, jiffies + 3 * HZ);
-	} else /* goto next request */
-		mg_request(host->breq);
-}
-
-static void mg_write_intr(struct mg_host *host)
-{
-	struct request *req = host->req;
-	u32 i;
-	bool rem;
-
-	/* check status */
-	do {
-		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-		if (i & ATA_BUSY)
-			break;
-		if (!MG_READY_OK(i))
-			break;
-		if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ))
-			goto ok_to_write;
-	} while (0);
-	mg_dump_status("mg_write_intr", i, host);
-	mg_bad_rw_intr(host);
-	mg_request(host->breq);
-	return;
-
-ok_to_write:
-	if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) {
-		/* write 1 sector and set handler if remains */
-		mg_write_one(host, req);
-		MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-		       blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio));
-		host->mg_do_intr = mg_write_intr;
-		mod_timer(&host->timer, jiffies + 3 * HZ);
-	}
-
-	/* send write confirm */
-	outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
-
-	if (!rem)
-		mg_request(host->breq);
-}
-
-static void mg_times_out(unsigned long data)
-{
-	struct mg_host *host = (struct mg_host *)data;
-	char *name;
-
-	spin_lock_irq(&host->lock);
-
-	if (!host->req)
-		goto out_unlock;
-
-	host->mg_do_intr = NULL;
-
-	name = host->req->rq_disk->disk_name;
-	printk(KERN_DEBUG "%s: timeout\n", name);
-
-	host->error = MG_ERR_TIMEOUT;
-	mg_bad_rw_intr(host);
-
-out_unlock:
-	mg_request(host->breq);
-	spin_unlock_irq(&host->lock);
-}
-
-static void mg_request_poll(struct request_queue *q)
-{
-	struct mg_host *host = q->queuedata;
-
-	while (1) {
-		if (!host->req) {
-			host->req = blk_fetch_request(q);
-			if (!host->req)
-				break;
-		}
-
-		switch (req_op(host->req)) {
-		case REQ_OP_READ:
-			mg_read(host->req);
-			break;
-		case REQ_OP_WRITE:
-			mg_write(host->req);
-			break;
-		default:
-			mg_end_request_cur(host, -EIO);
-			break;
-		}
-	}
-}
-
-static unsigned int mg_issue_req(struct request *req,
-		struct mg_host *host,
-		unsigned int sect_num,
-		unsigned int sect_cnt)
-{
-	switch (req_op(host->req)) {
-	case REQ_OP_READ:
-		if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr)
-				!= MG_ERR_NONE) {
-			mg_bad_rw_intr(host);
-			return host->error;
-		}
-		break;
-	case REQ_OP_WRITE:
-		/* TODO : handler */
-		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-		if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr)
-				!= MG_ERR_NONE) {
-			mg_bad_rw_intr(host);
-			return host->error;
-		}
-		del_timer(&host->timer);
-		mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ);
-		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-		if (host->error) {
-			mg_bad_rw_intr(host);
-			return host->error;
-		}
-		mg_write_one(host, req);
-		mod_timer(&host->timer, jiffies + 3 * HZ);
-		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
-				MG_REG_COMMAND);
-		break;
-	default:
-		mg_end_request_cur(host, -EIO);
-		break;
-	}
-	return MG_ERR_NONE;
-}
-
-/* This function also called from IRQ context */
-static void mg_request(struct request_queue *q)
-{
-	struct mg_host *host = q->queuedata;
-	struct request *req;
-	u32 sect_num, sect_cnt;
-
-	while (1) {
-		if (!host->req) {
-			host->req = blk_fetch_request(q);
-			if (!host->req)
-				break;
-		}
-		req = host->req;
-
-		/* check unwanted request call */
-		if (host->mg_do_intr)
-			return;
-
-		del_timer(&host->timer);
-
-		sect_num = blk_rq_pos(req);
-		/* deal whole segments */
-		sect_cnt = blk_rq_sectors(req);
-
-		/* sanity check */
-		if (sect_num >= get_capacity(req->rq_disk) ||
-				((sect_num + sect_cnt) >
-				 get_capacity(req->rq_disk))) {
-			printk(KERN_WARNING
-					"%s: bad access: sector=%d, count=%d\n",
-					req->rq_disk->disk_name,
-					sect_num, sect_cnt);
-			mg_end_request_cur(host, -EIO);
-			continue;
-		}
-
-		if (!mg_issue_req(req, host, sect_num, sect_cnt))
-			return;
-	}
-}
-
-static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct mg_host *host = bdev->bd_disk->private_data;
-
-	geo->cylinders = (unsigned short)host->cyls;
-	geo->heads = (unsigned char)host->heads;
-	geo->sectors = (unsigned char)host->sectors;
-	return 0;
-}
-
-static const struct block_device_operations mg_disk_ops = {
-	.getgeo = mg_getgeo
-};
-
-#ifdef CONFIG_PM_SLEEP
-static int mg_suspend(struct device *dev)
-{
-	struct mg_drv_data *prv_data = dev->platform_data;
-	struct mg_host *host = prv_data->host;
-
-	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-		return -EIO;
-
-	if (!prv_data->use_polling)
-		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-	outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND);
-	/* wait until mflash deep sleep */
-	msleep(1);
-
-	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) {
-		if (!prv_data->use_polling)
-			outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int mg_resume(struct device *dev)
-{
-	struct mg_drv_data *prv_data = dev->platform_data;
-	struct mg_host *host = prv_data->host;
-
-	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-		return -EIO;
-
-	outb(MG_CMD_WAKEUP, (unsigned long)host->dev_base + MG_REG_COMMAND);
-	/* wait until mflash wakeup */
-	msleep(1);
-
-	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
-		return -EIO;
-
-	if (!prv_data->use_polling)
-		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-
-	return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(mg_pm, mg_suspend, mg_resume);
-
-static int mg_probe(struct platform_device *plat_dev)
-{
-	struct mg_host *host;
-	struct resource *rsc;
-	struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
-	int err = 0;
-
-	if (!prv_data) {
-		printk(KERN_ERR	"%s:%d fail (no driver_data)\n",
-				__func__, __LINE__);
-		err = -EINVAL;
-		goto probe_err;
-	}
-
-	/* alloc mg_host */
-	host = kzalloc(sizeof(struct mg_host), GFP_KERNEL);
-	if (!host) {
-		printk(KERN_ERR "%s:%d fail (no memory for mg_host)\n",
-				__func__, __LINE__);
-		err = -ENOMEM;
-		goto probe_err;
-	}
-	host->major = MG_DISK_MAJ;
-
-	/* link each other */
-	prv_data->host = host;
-	host->dev = &plat_dev->dev;
-
-	/* io remap */
-	rsc = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
-	if (!rsc) {
-		printk(KERN_ERR "%s:%d platform_get_resource fail\n",
-				__func__, __LINE__);
-		err = -EINVAL;
-		goto probe_err_2;
-	}
-	host->dev_base = ioremap(rsc->start, resource_size(rsc));
-	if (!host->dev_base) {
-		printk(KERN_ERR "%s:%d ioremap fail\n",
-				__func__, __LINE__);
-		err = -EIO;
-		goto probe_err_2;
-	}
-	MG_DBG("dev_base = 0x%x\n", (u32)host->dev_base);
-
-	/* get reset pin */
-	rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
-			MG_RST_PIN);
-	if (!rsc) {
-		printk(KERN_ERR "%s:%d get reset pin fail\n",
-				__func__, __LINE__);
-		err = -EIO;
-		goto probe_err_3;
-	}
-	host->rst = rsc->start;
-
-	/* init rst pin */
-	err = gpio_request(host->rst, MG_RST_PIN);
-	if (err)
-		goto probe_err_3;
-	gpio_direction_output(host->rst, 1);
-
-	/* reset out pin */
-	if (!(prv_data->dev_attr & MG_DEV_MASK)) {
-		err = -EINVAL;
-		goto probe_err_3a;
-	}
-
-	if (prv_data->dev_attr != MG_BOOT_DEV) {
-		rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
-				MG_RSTOUT_PIN);
-		if (!rsc) {
-			printk(KERN_ERR "%s:%d get reset-out pin fail\n",
-					__func__, __LINE__);
-			err = -EIO;
-			goto probe_err_3a;
-		}
-		host->rstout = rsc->start;
-		err = gpio_request(host->rstout, MG_RSTOUT_PIN);
-		if (err)
-			goto probe_err_3a;
-		gpio_direction_input(host->rstout);
-	}
-
-	/* disk reset */
-	if (prv_data->dev_attr == MG_STORAGE_DEV) {
-		/* If POR seq. not yet finished, wait */
-		err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
-		if (err)
-			goto probe_err_3b;
-		err = mg_disk_init(host);
-		if (err) {
-			printk(KERN_ERR "%s:%d fail (err code : %d)\n",
-					__func__, __LINE__, err);
-			err = -EIO;
-			goto probe_err_3b;
-		}
-	}
-
-	/* get irq resource */
-	if (!prv_data->use_polling) {
-		host->irq = platform_get_irq(plat_dev, 0);
-		if (host->irq == -ENXIO) {
-			err = host->irq;
-			goto probe_err_3b;
-		}
-		err = request_irq(host->irq, mg_irq,
-				IRQF_TRIGGER_RISING,
-				MG_DEV_NAME, host);
-		if (err) {
-			printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n",
-					__func__, __LINE__, err);
-			goto probe_err_3b;
-		}
-
-	}
-
-	/* get disk id */
-	err = mg_get_disk_id(host);
-	if (err) {
-		printk(KERN_ERR "%s:%d fail (err code : %d)\n",
-				__func__, __LINE__, err);
-		err = -EIO;
-		goto probe_err_4;
-	}
-
-	err = register_blkdev(host->major, MG_DISK_NAME);
-	if (err < 0) {
-		printk(KERN_ERR "%s:%d register_blkdev fail (err code : %d)\n",
-				__func__, __LINE__, err);
-		goto probe_err_4;
-	}
-	if (!host->major)
-		host->major = err;
-
-	spin_lock_init(&host->lock);
-
-	if (prv_data->use_polling)
-		host->breq = blk_init_queue(mg_request_poll, &host->lock);
-	else
-		host->breq = blk_init_queue(mg_request, &host->lock);
-
-	if (!host->breq) {
-		err = -ENOMEM;
-		printk(KERN_ERR "%s:%d (blk_init_queue) fail\n",
-				__func__, __LINE__);
-		goto probe_err_5;
-	}
-	host->breq->queuedata = host;
-
-	/* mflash is random device, thanx for the noop */
-	err = elevator_change(host->breq, "noop");
-	if (err) {
-		printk(KERN_ERR "%s:%d (elevator_init) fail\n",
-				__func__, __LINE__);
-		goto probe_err_6;
-	}
-	blk_queue_max_hw_sectors(host->breq, MG_MAX_SECTS);
-	blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE);
-
-	init_timer(&host->timer);
-	host->timer.function = mg_times_out;
-	host->timer.data = (unsigned long)host;
-
-	host->gd = alloc_disk(MG_DISK_MAX_PART);
-	if (!host->gd) {
-		printk(KERN_ERR "%s:%d (alloc_disk) fail\n",
-				__func__, __LINE__);
-		err = -ENOMEM;
-		goto probe_err_7;
-	}
-	host->gd->major = host->major;
-	host->gd->first_minor = 0;
-	host->gd->fops = &mg_disk_ops;
-	host->gd->queue = host->breq;
-	host->gd->private_data = host;
-	sprintf(host->gd->disk_name, MG_DISK_NAME"a");
-
-	set_capacity(host->gd, host->n_sectors);
-
-	add_disk(host->gd);
-
-	return err;
-
-probe_err_7:
-	del_timer_sync(&host->timer);
-probe_err_6:
-	blk_cleanup_queue(host->breq);
-probe_err_5:
-	unregister_blkdev(host->major, MG_DISK_NAME);
-probe_err_4:
-	if (!prv_data->use_polling)
-		free_irq(host->irq, host);
-probe_err_3b:
-	gpio_free(host->rstout);
-probe_err_3a:
-	gpio_free(host->rst);
-probe_err_3:
-	iounmap(host->dev_base);
-probe_err_2:
-	kfree(host);
-probe_err:
-	return err;
-}
-
-static int mg_remove(struct platform_device *plat_dev)
-{
-	struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
-	struct mg_host *host = prv_data->host;
-	int err = 0;
-
-	/* delete timer */
-	del_timer_sync(&host->timer);
-
-	/* remove disk */
-	if (host->gd) {
-		del_gendisk(host->gd);
-		put_disk(host->gd);
-	}
-	/* remove queue */
-	if (host->breq)
-		blk_cleanup_queue(host->breq);
-
-	/* unregister blk device */
-	unregister_blkdev(host->major, MG_DISK_NAME);
-
-	/* free irq */
-	if (!prv_data->use_polling)
-		free_irq(host->irq, host);
-
-	/* free reset-out pin */
-	if (prv_data->dev_attr != MG_BOOT_DEV)
-		gpio_free(host->rstout);
-
-	/* free rst pin */
-	if (host->rst)
-		gpio_free(host->rst);
-
-	/* unmap io */
-	if (host->dev_base)
-		iounmap(host->dev_base);
-
-	/* free mg_host */
-	kfree(host);
-
-	return err;
-}
-
-static struct platform_driver mg_disk_driver = {
-	.probe = mg_probe,
-	.remove = mg_remove,
-	.driver = {
-		.name = MG_DEV_NAME,
-		.pm = &mg_pm,
-	}
-};
-
-/****************************************************************************
- *
- * Module stuff
- *
- ****************************************************************************/
-
-static int __init mg_init(void)
-{
-	printk(KERN_INFO "mGine mflash driver, (c) 2008 mGine Co.\n");
-	return platform_driver_register(&mg_disk_driver);
-}
-
-static void __exit mg_exit(void)
-{
-	printk(KERN_INFO "mflash driver : bye bye\n");
-	platform_driver_unregister(&mg_disk_driver);
-}
-
-module_init(mg_init);
-module_exit(mg_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("unsik Kim <donari75@gmail.com>");
-MODULE_DESCRIPTION("mGine m[g]flash device driver");
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f96ab717534c..02804cc79d82 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -169,6 +169,25 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
 	return false; /* device present */
 }
 
+/* we have to use runtime tag to setup command header */
+static void mtip_init_cmd_header(struct request *rq)
+{
+	struct driver_data *dd = rq->q->queuedata;
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
+
+	/* Point the command headers at the command tables. */
+	cmd->command_header = dd->port->command_list +
+				(sizeof(struct mtip_cmd_hdr) * rq->tag);
+	cmd->command_header_dma = dd->port->command_list_dma +
+				(sizeof(struct mtip_cmd_hdr) * rq->tag);
+
+	if (host_cap_64)
+		cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
+
+	cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
+}
+
 static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 {
 	struct request *rq;
@@ -180,6 +199,9 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 	if (IS_ERR(rq))
 		return NULL;
 
+	/* Internal cmd isn't submitted via .queue_rq */
+	mtip_init_cmd_header(rq);
+
 	return blk_mq_rq_to_pdu(rq);
 }
 
@@ -241,7 +263,8 @@ static void mtip_async_complete(struct mtip_port *port,
 
 	rq = mtip_rq_from_tag(dd, tag);
 
-	blk_mq_complete_request(rq, status);
+	cmd->status = status;
+	blk_mq_complete_request(rq);
 }
 
 /*
@@ -2910,18 +2933,19 @@ static void mtip_softirq_done_fn(struct request *rq)
 	if (unlikely(cmd->unaligned))
 		up(&dd->port->cmd_slot_unal);
 
-	blk_mq_end_request(rq, rq->errors);
+	blk_mq_end_request(rq, cmd->status);
 }
 
 static void mtip_abort_cmd(struct request *req, void *data,
 							bool reserved)
 {
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
 
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
-	req->errors = -EIO;
+	cmd->status = -EIO;
 	mtip_softirq_done_fn(req);
 }
 
@@ -3807,6 +3831,8 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct request *rq = bd->rq;
 	int ret;
 
+	mtip_init_cmd_header(rq);
+
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
@@ -3816,7 +3842,6 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (likely(!ret))
 		return BLK_MQ_RQ_QUEUE_OK;
 
-	rq->errors = ret;
 	return BLK_MQ_RQ_QUEUE_ERROR;
 }
 
@@ -3838,7 +3863,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
 {
 	struct driver_data *dd = data;
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
 
 	/*
 	 * For flush requests, request_idx starts at the end of the
@@ -3855,17 +3879,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
 
 	memset(cmd->command, 0, CMD_DMA_ALLOC_SZ);
 
-	/* Point the command headers at the command tables. */
-	cmd->command_header = dd->port->command_list +
-				(sizeof(struct mtip_cmd_hdr) * request_idx);
-	cmd->command_header_dma = dd->port->command_list_dma +
-				(sizeof(struct mtip_cmd_hdr) * request_idx);
-
-	if (host_cap_64)
-		cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
-
-	cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
-
 	sg_init_table(cmd->sg, MTIP_MAX_SG);
 	return 0;
 }
@@ -3889,7 +3902,7 @@ exit_handler:
 	return BLK_EH_RESET_TIMER;
 }
 
-static struct blk_mq_ops mtip_mq_ops = {
+static const struct blk_mq_ops mtip_mq_ops = {
 	.queue_rq	= mtip_queue_rq,
 	.init_request	= mtip_init_cmd,
 	.exit_request	= mtip_free_cmd,
@@ -3969,7 +3982,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 	dd->tags.reserved_tags = 1;
 	dd->tags.cmd_size = sizeof(struct mtip_cmd);
 	dd->tags.numa_node = dd->numa_node;
-	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
+	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED;
 	dd->tags.driver_data = dd;
 	dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS;
 
@@ -4025,7 +4038,6 @@ skip_create_disk:
 		dd->queue->limits.discard_granularity = 4096;
 		blk_queue_max_discard_sectors(dd->queue,
 			MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
-		dd->queue->limits.discard_zeroes_data = 0;
 	}
 
 	/* Set the capacity of the device in 512 byte sectors. */
@@ -4107,9 +4119,11 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 	struct driver_data *dd = (struct driver_data *)data;
 	struct mtip_cmd *cmd;
 
-	if (likely(!reserv))
-		blk_mq_complete_request(rq, -ENODEV);
-	else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
+	if (likely(!reserv)) {
+		cmd = blk_mq_rq_to_pdu(rq);
+		cmd->status = -ENODEV;
+		blk_mq_complete_request(rq);
+	} else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
 
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		if (cmd->comp_func)
@@ -4162,7 +4176,7 @@ static int mtip_block_remove(struct driver_data *dd)
 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
 						dd->disk->disk_name);
 
-	blk_mq_freeze_queue_start(dd->queue);
+	blk_freeze_queue_start(dd->queue);
 	blk_mq_stop_hw_queues(dd->queue);
 	blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
 
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 7617888f7944..57b41528a824 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -352,6 +352,7 @@ struct mtip_cmd {
 	int retries; /* The number of retries left for this command. */
 
 	int direction; /* Data transfer direction */
+	int status;
 };
 
 /* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d8a23561b4cb..ac376b9b852d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -40,49 +40,82 @@
 #include <asm/types.h>
 
 #include <linux/nbd.h>
+#include <linux/nbd-netlink.h>
+#include <net/genetlink.h>
 
 static DEFINE_IDR(nbd_index_idr);
 static DEFINE_MUTEX(nbd_index_mutex);
+static int nbd_total_devices = 0;
 
 struct nbd_sock {
 	struct socket *sock;
 	struct mutex tx_lock;
 	struct request *pending;
 	int sent;
+	bool dead;
+	int fallback_index;
+	int cookie;
+};
+
+struct recv_thread_args {
+	struct work_struct work;
+	struct nbd_device *nbd;
+	int index;
+};
+
+struct link_dead_args {
+	struct work_struct work;
+	int index;
 };
 
 #define NBD_TIMEDOUT			0
 #define NBD_DISCONNECT_REQUESTED	1
 #define NBD_DISCONNECTED		2
-#define NBD_RUNNING			3
+#define NBD_HAS_PID_FILE		3
+#define NBD_HAS_CONFIG_REF		4
+#define NBD_BOUND			5
+#define NBD_DESTROY_ON_DISCONNECT	6
 
-struct nbd_device {
+struct nbd_config {
 	u32 flags;
 	unsigned long runtime_flags;
-	struct nbd_sock **socks;
-	int magic;
+	u64 dead_conn_timeout;
 
-	struct blk_mq_tag_set tag_set;
-
-	struct mutex config_lock;
-	struct gendisk *disk;
+	struct nbd_sock **socks;
 	int num_connections;
+	atomic_t live_connections;
+	wait_queue_head_t conn_wait;
+
 	atomic_t recv_threads;
 	wait_queue_head_t recv_wq;
 	loff_t blksize;
 	loff_t bytesize;
-
-	struct task_struct *task_recv;
-	struct task_struct *task_setup;
-
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbg_dir;
 #endif
 };
 
+struct nbd_device {
+	struct blk_mq_tag_set tag_set;
+
+	int index;
+	refcount_t config_refs;
+	refcount_t refs;
+	struct nbd_config *config;
+	struct mutex config_lock;
+	struct gendisk *disk;
+
+	struct list_head list;
+	struct task_struct *task_recv;
+	struct task_struct *task_setup;
+};
+
 struct nbd_cmd {
 	struct nbd_device *nbd;
+	int index;
+	int cookie;
 	struct completion send_complete;
+	int status;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -100,18 +133,16 @@ static int part_shift;
 
 static int nbd_dev_dbg_init(struct nbd_device *nbd);
 static void nbd_dev_dbg_close(struct nbd_device *nbd);
-
+static void nbd_config_put(struct nbd_device *nbd);
+static void nbd_connect_reply(struct genl_info *info, int index);
+static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
+static void nbd_dead_link_work(struct work_struct *work);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
 	return disk_to_dev(nbd->disk);
 }
 
-static bool nbd_is_connected(struct nbd_device *nbd)
-{
-	return !!nbd->task_recv;
-}
-
 static const char *nbdcmd_to_ascii(int cmd)
 {
 	switch (cmd) {
@@ -124,44 +155,104 @@ static const char *nbdcmd_to_ascii(int cmd)
 	return "invalid";
 }
 
-static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
+static ssize_t pid_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
 {
-	if (bdev->bd_openers <= 1)
-		bd_set_size(bdev, 0);
-	set_capacity(nbd->disk, 0);
-	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
+	struct gendisk *disk = dev_to_disk(dev);
+	struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
 
-	return 0;
+	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
+}
+
+static struct device_attribute pid_attr = {
+	.attr = { .name = "pid", .mode = S_IRUGO},
+	.show = pid_show,
+};
+
+static void nbd_dev_remove(struct nbd_device *nbd)
+{
+	struct gendisk *disk = nbd->disk;
+	if (disk) {
+		del_gendisk(disk);
+		blk_cleanup_queue(disk->queue);
+		blk_mq_free_tag_set(&nbd->tag_set);
+		disk->private_data = NULL;
+		put_disk(disk);
+	}
+	kfree(nbd);
+}
+
+static void nbd_put(struct nbd_device *nbd)
+{
+	if (refcount_dec_and_mutex_lock(&nbd->refs,
+					&nbd_index_mutex)) {
+		idr_remove(&nbd_index_idr, nbd->index);
+		mutex_unlock(&nbd_index_mutex);
+		nbd_dev_remove(nbd);
+	}
+}
+
+static int nbd_disconnected(struct nbd_config *config)
+{
+	return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
+		test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+}
+
+static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
+				int notify)
+{
+	if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) {
+		struct link_dead_args *args;
+		args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO);
+		if (args) {
+			INIT_WORK(&args->work, nbd_dead_link_work);
+			args->index = nbd->index;
+			queue_work(system_wq, &args->work);
+		}
+	}
+	if (!nsock->dead) {
+		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
+		atomic_dec(&nbd->config->live_connections);
+	}
+	nsock->dead = true;
+	nsock->pending = NULL;
+	nsock->sent = 0;
+}
+
+static void nbd_size_clear(struct nbd_device *nbd)
+{
+	if (nbd->config->bytesize) {
+		set_capacity(nbd->disk, 0);
+		kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
+	}
 }
 
-static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
+static void nbd_size_update(struct nbd_device *nbd)
 {
-	blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
-	blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
-	bd_set_size(bdev, nbd->bytesize);
-	set_capacity(nbd->disk, nbd->bytesize >> 9);
+	struct nbd_config *config = nbd->config;
+	blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
+	blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
+	set_capacity(nbd->disk, config->bytesize >> 9);
 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 }
 
-static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
-			loff_t blocksize, loff_t nr_blocks)
+static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
+			 loff_t nr_blocks)
 {
-	nbd->blksize = blocksize;
-	nbd->bytesize = blocksize * nr_blocks;
-	if (nbd_is_connected(nbd))
-		nbd_size_update(nbd, bdev);
+	struct nbd_config *config = nbd->config;
+	config->blksize = blocksize;
+	config->bytesize = blocksize * nr_blocks;
+	nbd_size_update(nbd);
 }
 
-static void nbd_end_request(struct nbd_cmd *cmd)
+static void nbd_complete_rq(struct request *req)
 {
-	struct nbd_device *nbd = cmd->nbd;
-	struct request *req = blk_mq_rq_from_pdu(cmd);
-	int error = req->errors ? -EIO : 0;
+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 
-	dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
-		error ? "failed" : "done");
+	dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd,
+		cmd->status ? "failed" : "done");
 
-	blk_mq_complete_request(req, error);
+	blk_mq_end_request(req, cmd->status);
 }
 
 /*
@@ -169,17 +260,18 @@ static void nbd_end_request(struct nbd_cmd *cmd)
  */
 static void sock_shutdown(struct nbd_device *nbd)
 {
+	struct nbd_config *config = nbd->config;
 	int i;
 
-	if (nbd->num_connections == 0)
+	if (config->num_connections == 0)
 		return;
-	if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
+	if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
 		return;
 
-	for (i = 0; i < nbd->num_connections; i++) {
-		struct nbd_sock *nsock = nbd->socks[i];
+	for (i = 0; i < config->num_connections; i++) {
+		struct nbd_sock *nsock = config->socks[i];
 		mutex_lock(&nsock->tx_lock);
-		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
+		nbd_mark_nsock_dead(nbd, nsock, 0);
 		mutex_unlock(&nsock->tx_lock);
 	}
 	dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
@@ -190,14 +282,58 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct nbd_device *nbd = cmd->nbd;
+	struct nbd_config *config;
 
-	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
-	set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
-	req->errors = -EIO;
+	if (!refcount_inc_not_zero(&nbd->config_refs)) {
+		cmd->status = -EIO;
+		return BLK_EH_HANDLED;
+	}
 
-	mutex_lock(&nbd->config_lock);
+	/* If we are waiting on our dead timer then we could get timeout
+	 * callbacks for our request.  For this we just want to reset the timer
+	 * and let the queue side take care of everything.
+	 */
+	if (!completion_done(&cmd->send_complete)) {
+		nbd_config_put(nbd);
+		return BLK_EH_RESET_TIMER;
+	}
+	config = nbd->config;
+
+	if (config->num_connections > 1) {
+		dev_err_ratelimited(nbd_to_dev(nbd),
+				    "Connection timed out, retrying\n");
+		/*
+		 * Hooray we have more connections, requeue this IO, the submit
+		 * path will put it on a real connection.
+		 */
+		if (config->socks && config->num_connections > 1) {
+			if (cmd->index < config->num_connections) {
+				struct nbd_sock *nsock =
+					config->socks[cmd->index];
+				mutex_lock(&nsock->tx_lock);
+				/* We can have multiple outstanding requests, so
+				 * we don't want to mark the nsock dead if we've
+				 * already reconnected with a new socket, so
+				 * only mark it dead if its the same socket we
+				 * were sent out on.
+				 */
+				if (cmd->cookie == nsock->cookie)
+					nbd_mark_nsock_dead(nbd, nsock, 1);
+				mutex_unlock(&nsock->tx_lock);
+			}
+			blk_mq_requeue_request(req, true);
+			nbd_config_put(nbd);
+			return BLK_EH_NOT_HANDLED;
+		}
+	} else {
+		dev_err_ratelimited(nbd_to_dev(nbd),
+				    "Connection timed out\n");
+	}
+	set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+	cmd->status = -EIO;
 	sock_shutdown(nbd);
-	mutex_unlock(&nbd->config_lock);
+	nbd_config_put(nbd);
+
 	return BLK_EH_HANDLED;
 }
 
@@ -207,7 +343,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 static int sock_xmit(struct nbd_device *nbd, int index, int send,
 		     struct iov_iter *iter, int msg_flags, int *sent)
 {
-	struct socket *sock = nbd->socks[index]->sock;
+	struct nbd_config *config = nbd->config;
+	struct socket *sock = config->socks[index]->sock;
 	int result;
 	struct msghdr msg;
 	unsigned long pflags = current->flags;
@@ -253,7 +390,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
-	struct nbd_sock *nsock = nbd->socks[index];
+	struct nbd_config *config = nbd->config;
+	struct nbd_sock *nsock = config->socks[index];
 	int result;
 	struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
 	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
@@ -284,7 +422,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	}
 
 	if (rq_data_dir(req) == WRITE &&
-	    (nbd->flags & NBD_FLAG_READ_ONLY)) {
+	    (config->flags & NBD_FLAG_READ_ONLY)) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 				    "Write on read-only\n");
 		return -EIO;
@@ -301,6 +439,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		}
 		iov_iter_advance(&from, sent);
 	}
+	cmd->index = index;
+	cmd->cookie = nsock->cookie;
 	request.type = htonl(type);
 	if (type != NBD_CMD_FLUSH) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -328,7 +468,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		}
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
-		return -EIO;
+		return -EAGAIN;
 	}
 send_pages:
 	if (type != NBD_CMD_WRITE)
@@ -370,7 +510,7 @@ send_pages:
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
 					result);
-				return -EIO;
+				return -EAGAIN;
 			}
 			/*
 			 * The completion might already have come in,
@@ -392,6 +532,7 @@ out:
 /* NULL returned = something went wrong, inform userspace */
 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 {
+	struct nbd_config *config = nbd->config;
 	int result;
 	struct nbd_reply reply;
 	struct nbd_cmd *cmd;
@@ -405,8 +546,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
 	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
 	if (result <= 0) {
-		if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
-		    !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
+		if (!nbd_disconnected(config))
 			dev_err(disk_to_dev(nbd->disk),
 				"Receive control failed (result %d)\n", result);
 		return ERR_PTR(result);
@@ -433,7 +573,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	if (ntohl(reply.error)) {
 		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
 			ntohl(reply.error));
-		req->errors = -EIO;
+		cmd->status = -EIO;
 		return cmd;
 	}
 
@@ -449,8 +589,19 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
 					result);
-				req->errors = -EIO;
-				return cmd;
+				/*
+				 * If we've disconnected or we only have 1
+				 * connection then we need to make sure we
+				 * complete this request, otherwise error out
+				 * and let the timeout stuff handle resubmitting
+				 * this request onto another connection.
+				 */
+				if (nbd_disconnected(config) ||
+				    config->num_connections <= 1) {
+					cmd->status = -EIO;
+					return cmd;
+				}
+				return ERR_PTR(-EIO);
 			}
 			dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
 				cmd, bvec.bv_len);
@@ -462,54 +613,34 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	return cmd;
 }
 
-static ssize_t pid_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-	struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
-
-	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
-}
-
-static struct device_attribute pid_attr = {
-	.attr = { .name = "pid", .mode = S_IRUGO},
-	.show = pid_show,
-};
-
-struct recv_thread_args {
-	struct work_struct work;
-	struct nbd_device *nbd;
-	int index;
-};
-
 static void recv_work(struct work_struct *work)
 {
 	struct recv_thread_args *args = container_of(work,
 						     struct recv_thread_args,
 						     work);
 	struct nbd_device *nbd = args->nbd;
+	struct nbd_config *config = nbd->config;
 	struct nbd_cmd *cmd;
 	int ret = 0;
 
-	BUG_ON(nbd->magic != NBD_MAGIC);
 	while (1) {
 		cmd = nbd_read_stat(nbd, args->index);
 		if (IS_ERR(cmd)) {
+			struct nbd_sock *nsock = config->socks[args->index];
+
+			mutex_lock(&nsock->tx_lock);
+			nbd_mark_nsock_dead(nbd, nsock, 1);
+			mutex_unlock(&nsock->tx_lock);
 			ret = PTR_ERR(cmd);
 			break;
 		}
 
-		nbd_end_request(cmd);
+		blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
 	}
-
-	/*
-	 * We got an error, shut everybody down if this wasn't the result of a
-	 * disconnect request.
-	 */
-	if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
-		sock_shutdown(nbd);
-	atomic_dec(&nbd->recv_threads);
-	wake_up(&nbd->recv_wq);
+	atomic_dec(&config->recv_threads);
+	wake_up(&config->recv_wq);
+	nbd_config_put(nbd);
+	kfree(args);
 }
 
 static void nbd_clear_req(struct request *req, void *data, bool reserved)
@@ -519,47 +650,119 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 	if (!blk_mq_request_started(req))
 		return;
 	cmd = blk_mq_rq_to_pdu(req);
-	req->errors = -EIO;
-	nbd_end_request(cmd);
+	cmd->status = -EIO;
+	blk_mq_complete_request(req);
 }
 
 static void nbd_clear_que(struct nbd_device *nbd)
 {
-	BUG_ON(nbd->magic != NBD_MAGIC);
-
+	blk_mq_stop_hw_queues(nbd->disk->queue);
 	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
+	blk_mq_start_hw_queues(nbd->disk->queue);
 	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
 }
 
+static int find_fallback(struct nbd_device *nbd, int index)
+{
+	struct nbd_config *config = nbd->config;
+	int new_index = -1;
+	struct nbd_sock *nsock = config->socks[index];
+	int fallback = nsock->fallback_index;
+
+	if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+		return new_index;
+
+	if (config->num_connections <= 1) {
+		dev_err_ratelimited(disk_to_dev(nbd->disk),
+				    "Attempted send on invalid socket\n");
+		return new_index;
+	}
+
+	if (fallback >= 0 && fallback < config->num_connections &&
+	    !config->socks[fallback]->dead)
+		return fallback;
+
+	if (nsock->fallback_index < 0 ||
+	    nsock->fallback_index >= config->num_connections ||
+	    config->socks[nsock->fallback_index]->dead) {
+		int i;
+		for (i = 0; i < config->num_connections; i++) {
+			if (i == index)
+				continue;
+			if (!config->socks[i]->dead) {
+				new_index = i;
+				break;
+			}
+		}
+		nsock->fallback_index = new_index;
+		if (new_index < 0) {
+			dev_err_ratelimited(disk_to_dev(nbd->disk),
+					    "Dead connection, failed to find a fallback\n");
+			return new_index;
+		}
+	}
+	new_index = nsock->fallback_index;
+	return new_index;
+}
+
+static int wait_for_reconnect(struct nbd_device *nbd)
+{
+	struct nbd_config *config = nbd->config;
+	if (!config->dead_conn_timeout)
+		return 0;
+	if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+		return 0;
+	wait_event_interruptible_timeout(config->conn_wait,
+					 atomic_read(&config->live_connections),
+					 config->dead_conn_timeout);
+	return atomic_read(&config->live_connections);
+}
 
 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 	struct nbd_device *nbd = cmd->nbd;
+	struct nbd_config *config;
 	struct nbd_sock *nsock;
 	int ret;
 
-	if (index >= nbd->num_connections) {
+	if (!refcount_inc_not_zero(&nbd->config_refs)) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
-				    "Attempted send on invalid socket\n");
+				    "Socks array is empty\n");
 		return -EINVAL;
 	}
+	config = nbd->config;
 
-	if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
+	if (index >= config->num_connections) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
-				    "Attempted send on closed socket\n");
+				    "Attempted send on invalid socket\n");
+		nbd_config_put(nbd);
 		return -EINVAL;
 	}
-
-	req->errors = 0;
-
-	nsock = nbd->socks[index];
+	cmd->status = 0;
+again:
+	nsock = config->socks[index];
 	mutex_lock(&nsock->tx_lock);
-	if (unlikely(!nsock->sock)) {
+	if (nsock->dead) {
+		int old_index = index;
+		index = find_fallback(nbd, index);
 		mutex_unlock(&nsock->tx_lock);
-		dev_err_ratelimited(disk_to_dev(nbd->disk),
-				    "Attempted send on closed socket\n");
-		return -EINVAL;
+		if (index < 0) {
+			if (wait_for_reconnect(nbd)) {
+				index = old_index;
+				goto again;
+			}
+			/* All the sockets should already be down at this point,
+			 * we just want to make sure that DISCONNECTED is set so
+			 * any requests that come in that were queue'ed waiting
+			 * for the reconnect timer don't trigger the timer again
+			 * and instead just error out.
+			 */
+			sock_shutdown(nbd);
+			nbd_config_put(nbd);
+			return -EIO;
+		}
+		goto again;
 	}
 
 	/* Handle the case that we have a pending request that was partially
@@ -572,9 +775,21 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 		ret = 0;
 		goto out;
 	}
+	/*
+	 * Some failures are related to the link going down, so anything that
+	 * returns EAGAIN can be retried on a different socket.
+	 */
 	ret = nbd_send_cmd(nbd, cmd, index);
+	if (ret == -EAGAIN) {
+		dev_err_ratelimited(disk_to_dev(nbd->disk),
+				    "Request send failed trying another connection\n");
+		nbd_mark_nsock_dead(nbd, nsock, 1);
+		mutex_unlock(&nsock->tx_lock);
+		goto again;
+	}
 out:
 	mutex_unlock(&nsock->tx_lock);
+	nbd_config_put(nbd);
 	return ret;
 }
 
@@ -611,9 +826,10 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
-static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
-			  unsigned long arg)
+static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
+			  bool netlink)
 {
+	struct nbd_config *config = nbd->config;
 	struct socket *sock;
 	struct nbd_sock **socks;
 	struct nbd_sock *nsock;
@@ -623,43 +839,107 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
 	if (!sock)
 		return err;
 
-	if (!nbd->task_setup)
+	if (!netlink && !nbd->task_setup &&
+	    !test_bit(NBD_BOUND, &config->runtime_flags))
 		nbd->task_setup = current;
-	if (nbd->task_setup != current) {
+
+	if (!netlink &&
+	    (nbd->task_setup != current ||
+	     test_bit(NBD_BOUND, &config->runtime_flags))) {
 		dev_err(disk_to_dev(nbd->disk),
 			"Device being setup by another task");
-		return -EINVAL;
+		sockfd_put(sock);
+		return -EBUSY;
 	}
 
-	socks = krealloc(nbd->socks, (nbd->num_connections + 1) *
+	socks = krealloc(config->socks, (config->num_connections + 1) *
 			 sizeof(struct nbd_sock *), GFP_KERNEL);
-	if (!socks)
+	if (!socks) {
+		sockfd_put(sock);
 		return -ENOMEM;
+	}
 	nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
-	if (!nsock)
+	if (!nsock) {
+		sockfd_put(sock);
 		return -ENOMEM;
+	}
 
-	nbd->socks = socks;
+	config->socks = socks;
 
+	nsock->fallback_index = -1;
+	nsock->dead = false;
 	mutex_init(&nsock->tx_lock);
 	nsock->sock = sock;
 	nsock->pending = NULL;
 	nsock->sent = 0;
-	socks[nbd->num_connections++] = nsock;
+	nsock->cookie = 0;
+	socks[config->num_connections++] = nsock;
+	atomic_inc(&config->live_connections);
 
-	if (max_part)
-		bdev->bd_invalidated = 1;
 	return 0;
 }
 
+static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
+{
+	struct nbd_config *config = nbd->config;
+	struct socket *sock, *old;
+	struct recv_thread_args *args;
+	int i;
+	int err;
+
+	sock = sockfd_lookup(arg, &err);
+	if (!sock)
+		return err;
+
+	args = kzalloc(sizeof(*args), GFP_KERNEL);
+	if (!args) {
+		sockfd_put(sock);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < config->num_connections; i++) {
+		struct nbd_sock *nsock = config->socks[i];
+
+		if (!nsock->dead)
+			continue;
+
+		mutex_lock(&nsock->tx_lock);
+		if (!nsock->dead) {
+			mutex_unlock(&nsock->tx_lock);
+			continue;
+		}
+		sk_set_memalloc(sock->sk);
+		atomic_inc(&config->recv_threads);
+		refcount_inc(&nbd->config_refs);
+		old = nsock->sock;
+		nsock->fallback_index = -1;
+		nsock->sock = sock;
+		nsock->dead = false;
+		INIT_WORK(&args->work, recv_work);
+		args->index = i;
+		args->nbd = nbd;
+		nsock->cookie++;
+		mutex_unlock(&nsock->tx_lock);
+		sockfd_put(old);
+
+		/* We take the tx_mutex in an error path in the recv_work, so we
+		 * need to queue_work outside of the tx_mutex.
+		 */
+		queue_work(recv_workqueue, &args->work);
+
+		atomic_inc(&config->live_connections);
+		wake_up(&config->conn_wait);
+		return 0;
+	}
+	sockfd_put(sock);
+	kfree(args);
+	return -ENOSPC;
+}
+
 /* Reset all properties of an NBD device */
 static void nbd_reset(struct nbd_device *nbd)
 {
-	nbd->runtime_flags = 0;
-	nbd->blksize = 1024;
-	nbd->bytesize = 0;
-	set_capacity(nbd->disk, 0);
-	nbd->flags = 0;
+	nbd->config = NULL;
 	nbd->tag_set.timeout = 0;
 	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 }
@@ -668,21 +948,23 @@ static void nbd_bdev_reset(struct block_device *bdev)
 {
 	if (bdev->bd_openers > 1)
 		return;
-	set_device_ro(bdev, false);
-	bdev->bd_inode->i_size = 0;
+	bd_set_size(bdev, 0);
 	if (max_part > 0) {
 		blkdev_reread_part(bdev);
 		bdev->bd_invalidated = 1;
 	}
 }
 
-static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
+static void nbd_parse_flags(struct nbd_device *nbd)
 {
-	if (nbd->flags & NBD_FLAG_READ_ONLY)
-		set_device_ro(bdev, true);
-	if (nbd->flags & NBD_FLAG_SEND_TRIM)
+	struct nbd_config *config = nbd->config;
+	if (config->flags & NBD_FLAG_READ_ONLY)
+		set_disk_ro(nbd->disk, true);
+	else
+		set_disk_ro(nbd->disk, false);
+	if (config->flags & NBD_FLAG_SEND_TRIM)
 		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-	if (nbd->flags & NBD_FLAG_SEND_FLUSH)
+	if (config->flags & NBD_FLAG_SEND_FLUSH)
 		blk_queue_write_cache(nbd->disk->queue, true, false);
 	else
 		blk_queue_write_cache(nbd->disk->queue, false, false);
@@ -690,6 +972,7 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
 
 static void send_disconnects(struct nbd_device *nbd)
 {
+	struct nbd_config *config = nbd->config;
 	struct nbd_request request = {
 		.magic = htonl(NBD_REQUEST_MAGIC),
 		.type = htonl(NBD_CMD_DISC),
@@ -698,7 +981,7 @@ static void send_disconnects(struct nbd_device *nbd)
 	struct iov_iter from;
 	int i, ret;
 
-	for (i = 0; i < nbd->num_connections; i++) {
+	for (i = 0; i < config->num_connections; i++) {
 		iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
 		ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
 		if (ret <= 0)
@@ -707,145 +990,162 @@ static void send_disconnects(struct nbd_device *nbd)
 	}
 }
 
-static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_disconnect(struct nbd_device *nbd)
 {
-	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
-	if (!nbd->socks)
-		return -EINVAL;
-
-	mutex_unlock(&nbd->config_lock);
-	fsync_bdev(bdev);
-	mutex_lock(&nbd->config_lock);
-
-	/* Check again after getting mutex back.  */
-	if (!nbd->socks)
-		return -EINVAL;
+	struct nbd_config *config = nbd->config;
 
+	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
 	if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
-			      &nbd->runtime_flags))
+			      &config->runtime_flags))
 		send_disconnects(nbd);
 	return 0;
 }
 
-static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
+static void nbd_clear_sock(struct nbd_device *nbd)
 {
 	sock_shutdown(nbd);
 	nbd_clear_que(nbd);
+	nbd->task_setup = NULL;
+}
 
-	__invalidate_device(bdev, true);
-	nbd_bdev_reset(bdev);
-	/*
-	 * We want to give the run thread a chance to wait for everybody
-	 * to clean up and then do it's own cleanup.
-	 */
-	if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
-	    nbd->num_connections) {
-		int i;
-
-		for (i = 0; i < nbd->num_connections; i++) {
-			sockfd_put(nbd->socks[i]->sock);
-			kfree(nbd->socks[i]);
+static void nbd_config_put(struct nbd_device *nbd)
+{
+	if (refcount_dec_and_mutex_lock(&nbd->config_refs,
+					&nbd->config_lock)) {
+		struct nbd_config *config = nbd->config;
+		nbd_dev_dbg_close(nbd);
+		nbd_size_clear(nbd);
+		if (test_and_clear_bit(NBD_HAS_PID_FILE,
+				       &config->runtime_flags))
+			device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+		nbd->task_recv = NULL;
+		nbd_clear_sock(nbd);
+		if (config->num_connections) {
+			int i;
+			for (i = 0; i < config->num_connections; i++) {
+				sockfd_put(config->socks[i]->sock);
+				kfree(config->socks[i]);
+			}
+			kfree(config->socks);
 		}
-		kfree(nbd->socks);
-		nbd->socks = NULL;
-		nbd->num_connections = 0;
-	}
-	nbd->task_setup = NULL;
+		nbd_reset(nbd);
 
-	return 0;
+		mutex_unlock(&nbd->config_lock);
+		nbd_put(nbd);
+		module_put(THIS_MODULE);
+	}
 }
 
-static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_start_device(struct nbd_device *nbd)
 {
-	struct recv_thread_args *args;
-	int num_connections = nbd->num_connections;
+	struct nbd_config *config = nbd->config;
+	int num_connections = config->num_connections;
 	int error = 0, i;
 
 	if (nbd->task_recv)
 		return -EBUSY;
-	if (!nbd->socks)
+	if (!config->socks)
 		return -EINVAL;
 	if (num_connections > 1 &&
-	    !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
+	    !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) {
 		dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
-		error = -EINVAL;
-		goto out_err;
+		return -EINVAL;
 	}
 
-	set_bit(NBD_RUNNING, &nbd->runtime_flags);
-	blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
-	args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
-	if (!args) {
-		error = -ENOMEM;
-		goto out_err;
-	}
+	blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
 	nbd->task_recv = current;
-	mutex_unlock(&nbd->config_lock);
 
-	nbd_parse_flags(nbd, bdev);
+	nbd_parse_flags(nbd);
 
 	error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
 	if (error) {
 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
-		goto out_recv;
+		return error;
 	}
-
-	nbd_size_update(nbd, bdev);
+	set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
 
 	nbd_dev_dbg_init(nbd);
 	for (i = 0; i < num_connections; i++) {
-		sk_set_memalloc(nbd->socks[i]->sock->sk);
-		atomic_inc(&nbd->recv_threads);
-		INIT_WORK(&args[i].work, recv_work);
-		args[i].nbd = nbd;
-		args[i].index = i;
-		queue_work(recv_workqueue, &args[i].work);
-	}
-	wait_event_interruptible(nbd->recv_wq,
-				 atomic_read(&nbd->recv_threads) == 0);
-	for (i = 0; i < num_connections; i++)
-		flush_work(&args[i].work);
-	nbd_dev_dbg_close(nbd);
-	nbd_size_clear(nbd, bdev);
-	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
-out_recv:
-	mutex_lock(&nbd->config_lock);
-	nbd->task_recv = NULL;
-out_err:
-	clear_bit(NBD_RUNNING, &nbd->runtime_flags);
-	nbd_clear_sock(nbd, bdev);
+		struct recv_thread_args *args;
 
+		args = kzalloc(sizeof(*args), GFP_KERNEL);
+		if (!args) {
+			sock_shutdown(nbd);
+			return -ENOMEM;
+		}
+		sk_set_memalloc(config->socks[i]->sock->sk);
+		atomic_inc(&config->recv_threads);
+		refcount_inc(&nbd->config_refs);
+		INIT_WORK(&args->work, recv_work);
+		args->nbd = nbd;
+		args->index = i;
+		queue_work(recv_workqueue, &args->work);
+	}
+	return error;
+}
+
+static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
+{
+	struct nbd_config *config = nbd->config;
+	int ret;
+
+	ret = nbd_start_device(nbd);
+	if (ret)
+		return ret;
+
+	bd_set_size(bdev, config->bytesize);
+	if (max_part)
+		bdev->bd_invalidated = 1;
+	mutex_unlock(&nbd->config_lock);
+	ret = wait_event_interruptible(config->recv_wq,
+					 atomic_read(&config->recv_threads) == 0);
+	if (ret)
+		sock_shutdown(nbd);
+	mutex_lock(&nbd->config_lock);
+	bd_set_size(bdev, 0);
 	/* user requested, ignore socket errors */
-	if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
-		error = 0;
-	if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
-		error = -ETIMEDOUT;
+	if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+		ret = 0;
+	if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+		ret = -ETIMEDOUT;
+	return ret;
+}
 
-	nbd_reset(nbd);
-	return error;
+static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
+				 struct block_device *bdev)
+{
+	sock_shutdown(nbd);
+	kill_bdev(bdev);
+	nbd_bdev_reset(bdev);
+	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+			       &nbd->config->runtime_flags))
+		nbd_config_put(nbd);
 }
 
 /* Must be called with config_lock held */
 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		       unsigned int cmd, unsigned long arg)
 {
+	struct nbd_config *config = nbd->config;
+
 	switch (cmd) {
 	case NBD_DISCONNECT:
-		return nbd_disconnect(nbd, bdev);
+		return nbd_disconnect(nbd);
 	case NBD_CLEAR_SOCK:
-		return nbd_clear_sock(nbd, bdev);
+		nbd_clear_sock_ioctl(nbd, bdev);
+		return 0;
 	case NBD_SET_SOCK:
-		return nbd_add_socket(nbd, bdev, arg);
+		return nbd_add_socket(nbd, arg, false);
 	case NBD_SET_BLKSIZE:
-		nbd_size_set(nbd, bdev, arg,
-			     div_s64(nbd->bytesize, arg));
+		nbd_size_set(nbd, arg,
+			     div_s64(config->bytesize, arg));
 		return 0;
 	case NBD_SET_SIZE:
-		nbd_size_set(nbd, bdev, nbd->blksize,
-			     div_s64(arg, nbd->blksize));
+		nbd_size_set(nbd, config->blksize,
+			     div_s64(arg, config->blksize));
 		return 0;
 	case NBD_SET_SIZE_BLOCKS:
-		nbd_size_set(nbd, bdev, nbd->blksize, arg);
+		nbd_size_set(nbd, config->blksize, arg);
 		return 0;
 	case NBD_SET_TIMEOUT:
 		if (arg) {
@@ -855,10 +1155,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		return 0;
 
 	case NBD_SET_FLAGS:
-		nbd->flags = arg;
+		config->flags = arg;
 		return 0;
 	case NBD_DO_IT:
-		return nbd_start_device(nbd, bdev);
+		return nbd_start_device_ioctl(nbd, bdev);
 	case NBD_CLEAR_QUE:
 		/*
 		 * This is for compatibility only.  The queue is always cleared
@@ -879,23 +1179,92 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 		     unsigned int cmd, unsigned long arg)
 {
 	struct nbd_device *nbd = bdev->bd_disk->private_data;
-	int error;
+	struct nbd_config *config = nbd->config;
+	int error = -EINVAL;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	BUG_ON(nbd->magic != NBD_MAGIC);
-
 	mutex_lock(&nbd->config_lock);
-	error = __nbd_ioctl(bdev, nbd, cmd, arg);
-	mutex_unlock(&nbd->config_lock);
 
+	/* Don't allow ioctl operations on a nbd device that was created with
+	 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
+	 */
+	if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+	    (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
+		error = __nbd_ioctl(bdev, nbd, cmd, arg);
+	else
+		dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n");
+	mutex_unlock(&nbd->config_lock);
 	return error;
 }
 
+static struct nbd_config *nbd_alloc_config(void)
+{
+	struct nbd_config *config;
+
+	config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
+	if (!config)
+		return NULL;
+	atomic_set(&config->recv_threads, 0);
+	init_waitqueue_head(&config->recv_wq);
+	init_waitqueue_head(&config->conn_wait);
+	config->blksize = 1024;
+	atomic_set(&config->live_connections, 0);
+	try_module_get(THIS_MODULE);
+	return config;
+}
+
+static int nbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nbd_device *nbd;
+	int ret = 0;
+
+	mutex_lock(&nbd_index_mutex);
+	nbd = bdev->bd_disk->private_data;
+	if (!nbd) {
+		ret = -ENXIO;
+		goto out;
+	}
+	if (!refcount_inc_not_zero(&nbd->refs)) {
+		ret = -ENXIO;
+		goto out;
+	}
+	if (!refcount_inc_not_zero(&nbd->config_refs)) {
+		struct nbd_config *config;
+
+		mutex_lock(&nbd->config_lock);
+		if (refcount_inc_not_zero(&nbd->config_refs)) {
+			mutex_unlock(&nbd->config_lock);
+			goto out;
+		}
+		config = nbd->config = nbd_alloc_config();
+		if (!config) {
+			ret = -ENOMEM;
+			mutex_unlock(&nbd->config_lock);
+			goto out;
+		}
+		refcount_set(&nbd->config_refs, 1);
+		refcount_inc(&nbd->refs);
+		mutex_unlock(&nbd->config_lock);
+	}
+out:
+	mutex_unlock(&nbd_index_mutex);
+	return ret;
+}
+
+static void nbd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct nbd_device *nbd = disk->private_data;
+	nbd_config_put(nbd);
+	nbd_put(nbd);
+}
+
 static const struct block_device_operations nbd_fops =
 {
 	.owner =	THIS_MODULE,
+	.open =		nbd_open,
+	.release =	nbd_release,
 	.ioctl =	nbd_ioctl,
 	.compat_ioctl =	nbd_ioctl,
 };
@@ -927,7 +1296,7 @@ static const struct file_operations nbd_dbg_tasks_ops = {
 static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
 {
 	struct nbd_device *nbd = s->private;
-	u32 flags = nbd->flags;
+	u32 flags = nbd->config->flags;
 
 	seq_printf(s, "Hex: 0x%08x\n\n", flags);
 
@@ -960,6 +1329,7 @@ static const struct file_operations nbd_dbg_flags_ops = {
 static int nbd_dev_dbg_init(struct nbd_device *nbd)
 {
 	struct dentry *dir;
+	struct nbd_config *config = nbd->config;
 
 	if (!nbd_dbg_dir)
 		return -EIO;
@@ -970,12 +1340,12 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
 			nbd_name(nbd));
 		return -EIO;
 	}
-	nbd->dbg_dir = dir;
+	config->dbg_dir = dir;
 
 	debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
-	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
+	debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
 	debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
-	debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize);
+	debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
 	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
 
 	return 0;
@@ -983,7 +1353,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
 
 static void nbd_dev_dbg_close(struct nbd_device *nbd)
 {
-	debugfs_remove_recursive(nbd->dbg_dir);
+	debugfs_remove_recursive(nbd->config->dbg_dir);
 }
 
 static int nbd_dbg_init(void)
@@ -1035,25 +1405,13 @@ static int nbd_init_request(void *data, struct request *rq,
 	return 0;
 }
 
-static struct blk_mq_ops nbd_mq_ops = {
+static const struct blk_mq_ops nbd_mq_ops = {
 	.queue_rq	= nbd_queue_rq,
+	.complete	= nbd_complete_rq,
 	.init_request	= nbd_init_request,
 	.timeout	= nbd_xmit_timeout,
 };
 
-static void nbd_dev_remove(struct nbd_device *nbd)
-{
-	struct gendisk *disk = nbd->disk;
-	nbd->magic = 0;
-	if (disk) {
-		del_gendisk(disk);
-		blk_cleanup_queue(disk->queue);
-		blk_mq_free_tag_set(&nbd->tag_set);
-		put_disk(disk);
-	}
-	kfree(nbd);
-}
-
 static int nbd_dev_add(int index)
 {
 	struct nbd_device *nbd;
@@ -1082,6 +1440,7 @@ static int nbd_dev_add(int index)
 	if (err < 0)
 		goto out_free_disk;
 
+	nbd->index = index;
 	nbd->disk = disk;
 	nbd->tag_set.ops = &nbd_mq_ops;
 	nbd->tag_set.nr_hw_queues = 1;
@@ -1110,20 +1469,23 @@ static int nbd_dev_add(int index)
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
 	disk->queue->limits.discard_granularity = 512;
 	blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
-	disk->queue->limits.discard_zeroes_data = 0;
+	blk_queue_max_segment_size(disk->queue, UINT_MAX);
+	blk_queue_max_segments(disk->queue, USHRT_MAX);
 	blk_queue_max_hw_sectors(disk->queue, 65536);
 	disk->queue->limits.max_sectors = 256;
 
-	nbd->magic = NBD_MAGIC;
 	mutex_init(&nbd->config_lock);
+	refcount_set(&nbd->config_refs, 0);
+	refcount_set(&nbd->refs, 1);
+	INIT_LIST_HEAD(&nbd->list);
 	disk->major = NBD_MAJOR;
 	disk->first_minor = index << part_shift;
 	disk->fops = &nbd_fops;
 	disk->private_data = nbd;
 	sprintf(disk->disk_name, "nbd%d", index);
-	init_waitqueue_head(&nbd->recv_wq);
 	nbd_reset(nbd);
 	add_disk(disk);
+	nbd_total_devices++;
 	return index;
 
 out_free_tags:
@@ -1138,10 +1500,535 @@ out:
 	return err;
 }
 
-/*
- * And here should be modules and kernel interface 
- *  (Just smiley confuses emacs :-)
+static int find_free_cb(int id, void *ptr, void *data)
+{
+	struct nbd_device *nbd = ptr;
+	struct nbd_device **found = data;
+
+	if (!refcount_read(&nbd->config_refs)) {
+		*found = nbd;
+		return 1;
+	}
+	return 0;
+}
+
+/* Netlink interface. */
+static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
+	[NBD_ATTR_INDEX]		=	{ .type = NLA_U32 },
+	[NBD_ATTR_SIZE_BYTES]		=	{ .type = NLA_U64 },
+	[NBD_ATTR_BLOCK_SIZE_BYTES]	=	{ .type = NLA_U64 },
+	[NBD_ATTR_TIMEOUT]		=	{ .type = NLA_U64 },
+	[NBD_ATTR_SERVER_FLAGS]		=	{ .type = NLA_U64 },
+	[NBD_ATTR_CLIENT_FLAGS]		=	{ .type = NLA_U64 },
+	[NBD_ATTR_SOCKETS]		=	{ .type = NLA_NESTED},
+	[NBD_ATTR_DEAD_CONN_TIMEOUT]	=	{ .type = NLA_U64 },
+	[NBD_ATTR_DEVICE_LIST]		=	{ .type = NLA_NESTED},
+};
+
+static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
+	[NBD_SOCK_FD]			=	{ .type = NLA_U32 },
+};
+
+/* We don't use this right now since we don't parse the incoming list, but we
+ * still want it here so userspace knows what to expect.
  */
+static struct nla_policy __attribute__((unused))
+nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
+	[NBD_DEVICE_INDEX]		=	{ .type = NLA_U32 },
+	[NBD_DEVICE_CONNECTED]		=	{ .type = NLA_U8 },
+};
+
+static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nbd_device *nbd = NULL;
+	struct nbd_config *config;
+	int index = -1;
+	int ret;
+	bool put_dev = false;
+
+	if (!netlink_capable(skb, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (info->attrs[NBD_ATTR_INDEX])
+		index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+	if (!info->attrs[NBD_ATTR_SOCKETS]) {
+		printk(KERN_ERR "nbd: must specify at least one socket\n");
+		return -EINVAL;
+	}
+	if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
+		printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
+		return -EINVAL;
+	}
+again:
+	mutex_lock(&nbd_index_mutex);
+	if (index == -1) {
+		ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
+		if (ret == 0) {
+			int new_index;
+			new_index = nbd_dev_add(-1);
+			if (new_index < 0) {
+				mutex_unlock(&nbd_index_mutex);
+				printk(KERN_ERR "nbd: failed to add new device\n");
+				return ret;
+			}
+			nbd = idr_find(&nbd_index_idr, new_index);
+		}
+	} else {
+		nbd = idr_find(&nbd_index_idr, index);
+	}
+	if (!nbd) {
+		printk(KERN_ERR "nbd: couldn't find device at index %d\n",
+		       index);
+		mutex_unlock(&nbd_index_mutex);
+		return -EINVAL;
+	}
+	if (!refcount_inc_not_zero(&nbd->refs)) {
+		mutex_unlock(&nbd_index_mutex);
+		if (index == -1)
+			goto again;
+		printk(KERN_ERR "nbd: device at index %d is going down\n",
+		       index);
+		return -EINVAL;
+	}
+	mutex_unlock(&nbd_index_mutex);
+
+	mutex_lock(&nbd->config_lock);
+	if (refcount_read(&nbd->config_refs)) {
+		mutex_unlock(&nbd->config_lock);
+		nbd_put(nbd);
+		if (index == -1)
+			goto again;
+		printk(KERN_ERR "nbd: nbd%d already in use\n", index);
+		return -EBUSY;
+	}
+	if (WARN_ON(nbd->config)) {
+		mutex_unlock(&nbd->config_lock);
+		nbd_put(nbd);
+		return -EINVAL;
+	}
+	config = nbd->config = nbd_alloc_config();
+	if (!nbd->config) {
+		mutex_unlock(&nbd->config_lock);
+		nbd_put(nbd);
+		printk(KERN_ERR "nbd: couldn't allocate config\n");
+		return -ENOMEM;
+	}
+	refcount_set(&nbd->config_refs, 1);
+	set_bit(NBD_BOUND, &config->runtime_flags);
+
+	if (info->attrs[NBD_ATTR_SIZE_BYTES]) {
+		u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
+		nbd_size_set(nbd, config->blksize,
+			     div64_u64(bytes, config->blksize));
+	}
+	if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
+		u64 bsize =
+			nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
+		nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize));
+	}
+	if (info->attrs[NBD_ATTR_TIMEOUT]) {
+		u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
+		nbd->tag_set.timeout = timeout * HZ;
+		blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
+	}
+	if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
+		config->dead_conn_timeout =
+			nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
+		config->dead_conn_timeout *= HZ;
+	}
+	if (info->attrs[NBD_ATTR_SERVER_FLAGS])
+		config->flags =
+			nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]);
+	if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
+		u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
+		if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
+			set_bit(NBD_DESTROY_ON_DISCONNECT,
+				&config->runtime_flags);
+			put_dev = true;
+		}
+	}
+
+	if (info->attrs[NBD_ATTR_SOCKETS]) {
+		struct nlattr *attr;
+		int rem, fd;
+
+		nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
+				    rem) {
+			struct nlattr *socks[NBD_SOCK_MAX+1];
+
+			if (nla_type(attr) != NBD_SOCK_ITEM) {
+				printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
+					       nbd_sock_policy);
+			if (ret != 0) {
+				printk(KERN_ERR "nbd: error processing sock list\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			if (!socks[NBD_SOCK_FD])
+				continue;
+			fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
+			ret = nbd_add_socket(nbd, fd, true);
+			if (ret)
+				goto out;
+		}
+	}
+	ret = nbd_start_device(nbd);
+out:
+	mutex_unlock(&nbd->config_lock);
+	if (!ret) {
+		set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+		refcount_inc(&nbd->config_refs);
+		nbd_connect_reply(info, nbd->index);
+	}
+	nbd_config_put(nbd);
+	if (put_dev)
+		nbd_put(nbd);
+	return ret;
+}
+
+static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nbd_device *nbd;
+	int index;
+
+	if (!netlink_capable(skb, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!info->attrs[NBD_ATTR_INDEX]) {
+		printk(KERN_ERR "nbd: must specify an index to disconnect\n");
+		return -EINVAL;
+	}
+	index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+	mutex_lock(&nbd_index_mutex);
+	nbd = idr_find(&nbd_index_idr, index);
+	if (!nbd) {
+		mutex_unlock(&nbd_index_mutex);
+		printk(KERN_ERR "nbd: couldn't find device at index %d\n",
+		       index);
+		return -EINVAL;
+	}
+	if (!refcount_inc_not_zero(&nbd->refs)) {
+		mutex_unlock(&nbd_index_mutex);
+		printk(KERN_ERR "nbd: device at index %d is going down\n",
+		       index);
+		return -EINVAL;
+	}
+	mutex_unlock(&nbd_index_mutex);
+	if (!refcount_inc_not_zero(&nbd->config_refs)) {
+		nbd_put(nbd);
+		return 0;
+	}
+	mutex_lock(&nbd->config_lock);
+	nbd_disconnect(nbd);
+	mutex_unlock(&nbd->config_lock);
+	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+			       &nbd->config->runtime_flags))
+		nbd_config_put(nbd);
+	nbd_config_put(nbd);
+	nbd_put(nbd);
+	return 0;
+}
+
+static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nbd_device *nbd = NULL;
+	struct nbd_config *config;
+	int index;
+	int ret = -EINVAL;
+	bool put_dev = false;
+
+	if (!netlink_capable(skb, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!info->attrs[NBD_ATTR_INDEX]) {
+		printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
+		return -EINVAL;
+	}
+	index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+	mutex_lock(&nbd_index_mutex);
+	nbd = idr_find(&nbd_index_idr, index);
+	if (!nbd) {
+		mutex_unlock(&nbd_index_mutex);
+		printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
+		       index);
+		return -EINVAL;
+	}
+	if (!refcount_inc_not_zero(&nbd->refs)) {
+		mutex_unlock(&nbd_index_mutex);
+		printk(KERN_ERR "nbd: device at index %d is going down\n",
+		       index);
+		return -EINVAL;
+	}
+	mutex_unlock(&nbd_index_mutex);
+
+	if (!refcount_inc_not_zero(&nbd->config_refs)) {
+		dev_err(nbd_to_dev(nbd),
+			"not configured, cannot reconfigure\n");
+		nbd_put(nbd);
+		return -EINVAL;
+	}
+
+	mutex_lock(&nbd->config_lock);
+	config = nbd->config;
+	if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+	    !nbd->task_recv) {
+		dev_err(nbd_to_dev(nbd),
+			"not configured, cannot reconfigure\n");
+		goto out;
+	}
+
+	if (info->attrs[NBD_ATTR_TIMEOUT]) {
+		u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
+		nbd->tag_set.timeout = timeout * HZ;
+		blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
+	}
+	if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
+		config->dead_conn_timeout =
+			nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
+		config->dead_conn_timeout *= HZ;
+	}
+	if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
+		u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
+		if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
+			if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+					      &config->runtime_flags))
+				put_dev = true;
+		} else {
+			if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+					       &config->runtime_flags))
+				refcount_inc(&nbd->refs);
+		}
+	}
+
+	if (info->attrs[NBD_ATTR_SOCKETS]) {
+		struct nlattr *attr;
+		int rem, fd;
+
+		nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
+				    rem) {
+			struct nlattr *socks[NBD_SOCK_MAX+1];
+
+			if (nla_type(attr) != NBD_SOCK_ITEM) {
+				printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
+					       nbd_sock_policy);
+			if (ret != 0) {
+				printk(KERN_ERR "nbd: error processing sock list\n");
+				ret = -EINVAL;
+				goto out;
+			}
+			if (!socks[NBD_SOCK_FD])
+				continue;
+			fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
+			ret = nbd_reconnect_socket(nbd, fd);
+			if (ret) {
+				if (ret == -ENOSPC)
+					ret = 0;
+				goto out;
+			}
+			dev_info(nbd_to_dev(nbd), "reconnected socket\n");
+		}
+	}
+out:
+	mutex_unlock(&nbd->config_lock);
+	nbd_config_put(nbd);
+	nbd_put(nbd);
+	if (put_dev)
+		nbd_put(nbd);
+	return ret;
+}
+
+static const struct genl_ops nbd_connect_genl_ops[] = {
+	{
+		.cmd	= NBD_CMD_CONNECT,
+		.policy	= nbd_attr_policy,
+		.doit	= nbd_genl_connect,
+	},
+	{
+		.cmd	= NBD_CMD_DISCONNECT,
+		.policy	= nbd_attr_policy,
+		.doit	= nbd_genl_disconnect,
+	},
+	{
+		.cmd	= NBD_CMD_RECONFIGURE,
+		.policy	= nbd_attr_policy,
+		.doit	= nbd_genl_reconfigure,
+	},
+	{
+		.cmd	= NBD_CMD_STATUS,
+		.policy	= nbd_attr_policy,
+		.doit	= nbd_genl_status,
+	},
+};
+
+static const struct genl_multicast_group nbd_mcast_grps[] = {
+	{ .name = NBD_GENL_MCAST_GROUP_NAME, },
+};
+
+static struct genl_family nbd_genl_family __ro_after_init = {
+	.hdrsize	= 0,
+	.name		= NBD_GENL_FAMILY_NAME,
+	.version	= NBD_GENL_VERSION,
+	.module		= THIS_MODULE,
+	.ops		= nbd_connect_genl_ops,
+	.n_ops		= ARRAY_SIZE(nbd_connect_genl_ops),
+	.maxattr	= NBD_ATTR_MAX,
+	.mcgrps		= nbd_mcast_grps,
+	.n_mcgrps	= ARRAY_SIZE(nbd_mcast_grps),
+};
+
+static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
+{
+	struct nlattr *dev_opt;
+	u8 connected = 0;
+	int ret;
+
+	/* This is a little racey, but for status it's ok.  The
+	 * reason we don't take a ref here is because we can't
+	 * take a ref in the index == -1 case as we would need
+	 * to put under the nbd_index_mutex, which could
+	 * deadlock if we are configured to remove ourselves
+	 * once we're disconnected.
+	 */
+	if (refcount_read(&nbd->config_refs))
+		connected = 1;
+	dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM);
+	if (!dev_opt)
+		return -EMSGSIZE;
+	ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
+	if (ret)
+		return -EMSGSIZE;
+	ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED,
+			 connected);
+	if (ret)
+		return -EMSGSIZE;
+	nla_nest_end(reply, dev_opt);
+	return 0;
+}
+
+static int status_cb(int id, void *ptr, void *data)
+{
+	struct nbd_device *nbd = ptr;
+	return populate_nbd_status(nbd, (struct sk_buff *)data);
+}
+
+static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *dev_list;
+	struct sk_buff *reply;
+	void *reply_head;
+	size_t msg_size;
+	int index = -1;
+	int ret = -ENOMEM;
+
+	if (info->attrs[NBD_ATTR_INDEX])
+		index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+
+	mutex_lock(&nbd_index_mutex);
+
+	msg_size = nla_total_size(nla_attr_size(sizeof(u32)) +
+				  nla_attr_size(sizeof(u8)));
+	msg_size *= (index == -1) ? nbd_total_devices : 1;
+
+	reply = genlmsg_new(msg_size, GFP_KERNEL);
+	if (!reply)
+		goto out;
+	reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0,
+				       NBD_CMD_STATUS);
+	if (!reply_head) {
+		nlmsg_free(reply);
+		goto out;
+	}
+
+	dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST);
+	if (index == -1) {
+		ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
+		if (ret) {
+			nlmsg_free(reply);
+			goto out;
+		}
+	} else {
+		struct nbd_device *nbd;
+		nbd = idr_find(&nbd_index_idr, index);
+		if (nbd) {
+			ret = populate_nbd_status(nbd, reply);
+			if (ret) {
+				nlmsg_free(reply);
+				goto out;
+			}
+		}
+	}
+	nla_nest_end(reply, dev_list);
+	genlmsg_end(reply, reply_head);
+	genlmsg_reply(reply, info);
+	ret = 0;
+out:
+	mutex_unlock(&nbd_index_mutex);
+	return ret;
+}
+
+static void nbd_connect_reply(struct genl_info *info, int index)
+{
+	struct sk_buff *skb;
+	void *msg_head;
+	int ret;
+
+	skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
+	if (!skb)
+		return;
+	msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0,
+				     NBD_CMD_CONNECT);
+	if (!msg_head) {
+		nlmsg_free(skb);
+		return;
+	}
+	ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
+	if (ret) {
+		nlmsg_free(skb);
+		return;
+	}
+	genlmsg_end(skb, msg_head);
+	genlmsg_reply(skb, info);
+}
+
+static void nbd_mcast_index(int index)
+{
+	struct sk_buff *skb;
+	void *msg_head;
+	int ret;
+
+	skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
+	if (!skb)
+		return;
+	msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0,
+				     NBD_CMD_LINK_DEAD);
+	if (!msg_head) {
+		nlmsg_free(skb);
+		return;
+	}
+	ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
+	if (ret) {
+		nlmsg_free(skb);
+		return;
+	}
+	genlmsg_end(skb, msg_head);
+	genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL);
+}
+
+static void nbd_dead_link_work(struct work_struct *work)
+{
+	struct link_dead_args *args = container_of(work, struct link_dead_args,
+						   work);
+	nbd_mcast_index(args->index);
+	kfree(args);
+}
 
 static int __init nbd_init(void)
 {
@@ -1184,6 +2071,11 @@ static int __init nbd_init(void)
 		return -EIO;
 	}
 
+	if (genl_register_family(&nbd_genl_family)) {
+		unregister_blkdev(NBD_MAJOR, "nbd");
+		destroy_workqueue(recv_workqueue);
+		return -EINVAL;
+	}
 	nbd_dbg_init();
 
 	mutex_lock(&nbd_index_mutex);
@@ -1195,17 +2087,34 @@ static int __init nbd_init(void)
 
 static int nbd_exit_cb(int id, void *ptr, void *data)
 {
+	struct list_head *list = (struct list_head *)data;
 	struct nbd_device *nbd = ptr;
-	nbd_dev_remove(nbd);
+
+	list_add_tail(&nbd->list, list);
 	return 0;
 }
 
 static void __exit nbd_cleanup(void)
 {
+	struct nbd_device *nbd;
+	LIST_HEAD(del_list);
+
 	nbd_dbg_close();
 
-	idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL);
+	mutex_lock(&nbd_index_mutex);
+	idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list);
+	mutex_unlock(&nbd_index_mutex);
+
+	while (!list_empty(&del_list)) {
+		nbd = list_first_entry(&del_list, struct nbd_device, list);
+		list_del_init(&nbd->list);
+		if (refcount_read(&nbd->refs) != 1)
+			printk(KERN_ERR "nbd: possibly leaking a device\n");
+		nbd_put(nbd);
+	}
+
 	idr_destroy(&nbd_index_idr);
+	genl_unregister_family(&nbd_genl_family);
 	destroy_workqueue(recv_workqueue);
 	unregister_blkdev(NBD_MAJOR, "nbd");
 }
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 6f2e565bccc5..d946e1eeac8e 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -117,6 +117,10 @@ static bool use_lightnvm;
 module_param(use_lightnvm, bool, S_IRUGO);
 MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
 
+static bool blocking;
+module_param(blocking, bool, S_IRUGO);
+MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
+
 static int irqmode = NULL_IRQ_SOFTIRQ;
 
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -277,7 +281,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
 	case NULL_IRQ_SOFTIRQ:
 		switch (queue_mode)  {
 		case NULL_Q_MQ:
-			blk_mq_complete_request(cmd->rq, cmd->rq->errors);
+			blk_mq_complete_request(cmd->rq);
 			break;
 		case NULL_Q_RQ:
 			blk_complete_request(cmd->rq);
@@ -357,6 +361,8 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
 
+	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+
 	if (irqmode == NULL_IRQ_TIMER) {
 		hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		cmd->timer.function = null_cmd_timer_expired;
@@ -392,7 +398,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	return 0;
 }
 
-static struct blk_mq_ops null_mq_ops = {
+static const struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
 	.init_hctx	= null_init_hctx,
 	.complete	= null_softirq_done_fn,
@@ -437,14 +443,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
-	rq->__sector = bio->bi_iter.bi_sector;
-	rq->ioprio = bio_prio(bio);
-
-	if (bio_has_data(bio))
-		rq->nr_phys_segments = bio_phys_segments(q, bio);
-
-	rq->__data_len = bio->bi_iter.bi_size;
-	rq->bio = rq->biotail = bio;
+	blk_init_request_from_bio(rq, bio);
 
 	rq->end_io_data = rqd;
 
@@ -724,6 +723,9 @@ static int null_add_dev(void)
 		nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 		nullb->tag_set.driver_data = nullb;
 
+		if (blocking)
+			nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
+
 		rv = blk_mq_alloc_tag_set(&nullb->tag_set);
 		if (rv)
 			goto out_cleanup_queues;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
deleted file mode 100644
index 8127b8201a01..000000000000
--- a/drivers/block/osdblk.c
+++ /dev/null
@@ -1,693 +0,0 @@
-
-/*
-   osdblk.c -- Export a single SCSI OSD object as a Linux block device
-
-
-   Copyright 2009 Red Hat, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
-
-   Instructions for use
-   --------------------
-
-   1) Map a Linux block device to an existing OSD object.
-
-      In this example, we will use partition id 1234, object id 5678,
-      OSD device /dev/osd1.
-
-      $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
-
-
-   2) List all active blkdev<->object mappings.
-
-      In this example, we have performed step #1 twice, creating two blkdevs,
-      mapped to two separate OSD objects.
-
-      $ cat /sys/class/osdblk/list
-      0 174 1234 5678 /dev/osd1
-      1 179 1994 897123 /dev/osd0
-
-      The columns, in order, are:
-      - blkdev unique id
-      - blkdev assigned major
-      - OSD object partition id
-      - OSD object id
-      - OSD device
-
-
-   3) Remove an active blkdev<->object mapping.
-
-      In this example, we remove the mapping with blkdev unique id 1.
-
-      $ echo 1 > /sys/class/osdblk/remove
-
-
-   NOTE:  The actual creation and deletion of OSD objects is outside the scope
-   of this driver.
-
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <scsi/osd_initiator.h>
-#include <scsi/osd_attributes.h>
-#include <scsi/osd_sec.h>
-#include <scsi/scsi_device.h>
-
-#define DRV_NAME "osdblk"
-#define PFX DRV_NAME ": "
-
-/* #define _OSDBLK_DEBUG */
-#ifdef _OSDBLK_DEBUG
-#define OSDBLK_DEBUG(fmt, a...) \
-	printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
-#else
-#define OSDBLK_DEBUG(fmt, a...) \
-	do { if (0) printk(fmt, ##a); } while (0)
-#endif
-
-MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
-MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
-MODULE_LICENSE("GPL");
-
-struct osdblk_device;
-
-enum {
-	OSDBLK_MINORS_PER_MAJOR	= 256,		/* max minors per blkdev */
-	OSDBLK_MAX_REQ		= 32,		/* max parallel requests */
-	OSDBLK_OP_TIMEOUT	= 4 * 60,	/* sync OSD req timeout */
-};
-
-struct osdblk_request {
-	struct request		*rq;		/* blk layer request */
-	struct bio		*bio;		/* cloned bio */
-	struct osdblk_device	*osdev;		/* associated blkdev */
-};
-
-struct osdblk_device {
-	int			id;		/* blkdev unique id */
-
-	int			major;		/* blkdev assigned major */
-	struct gendisk		*disk;		/* blkdev's gendisk and rq */
-	struct request_queue	*q;
-
-	struct osd_dev		*osd;		/* associated OSD */
-
-	char			name[32];	/* blkdev name, e.g. osdblk34 */
-
-	spinlock_t		lock;		/* queue lock */
-
-	struct osd_obj_id	obj;		/* OSD partition, obj id */
-	uint8_t			obj_cred[OSD_CAP_LEN]; /* OSD cred */
-
-	struct osdblk_request	req[OSDBLK_MAX_REQ]; /* request table */
-
-	struct list_head	node;
-
-	char			osd_path[0];	/* OSD device path */
-};
-
-static struct class *class_osdblk;		/* /sys/class/osdblk */
-static DEFINE_MUTEX(ctl_mutex);	/* Serialize open/close/setup/teardown */
-static LIST_HEAD(osdblkdev_list);
-
-static const struct block_device_operations osdblk_bd_ops = {
-	.owner		= THIS_MODULE,
-};
-
-static const struct osd_attr g_attr_logical_length = ATTR_DEF(
-	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
-
-static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
-				   const struct osd_obj_id *obj)
-{
-	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
-}
-
-/* copied from exofs; move to libosd? */
-/*
- * Perform a synchronous OSD operation.  copied from exofs; move to libosd?
- */
-static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
-{
-	int ret;
-
-	or->timeout = timeout;
-	ret = osd_finalize_request(or, 0, credential, NULL);
-	if (ret)
-		return ret;
-
-	ret = osd_execute_request(or);
-
-	/* osd_req_decode_sense(or, ret); */
-	return ret;
-}
-
-/*
- * Perform an asynchronous OSD operation.  copied from exofs; move to libosd?
- */
-static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
-		   void *caller_context, u8 *cred)
-{
-	int ret;
-
-	ret = osd_finalize_request(or, 0, cred, NULL);
-	if (ret)
-		return ret;
-
-	ret = osd_execute_request_async(or, async_done, caller_context);
-
-	return ret;
-}
-
-/* copied from exofs; move to libosd? */
-static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
-{
-	struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
-	void *iter = NULL;
-	int nelem;
-
-	do {
-		nelem = 1;
-		osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
-		if ((cur_attr.attr_page == attr->attr_page) &&
-		    (cur_attr.attr_id == attr->attr_id)) {
-			attr->len = cur_attr.len;
-			attr->val_ptr = cur_attr.val_ptr;
-			return 0;
-		}
-	} while (iter);
-
-	return -EIO;
-}
-
-static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
-{
-	struct osd_request *or;
-	struct osd_attr attr;
-	int ret;
-
-	/* start request */
-	or = osd_start_request(osdev->osd, GFP_KERNEL);
-	if (!or)
-		return -ENOMEM;
-
-	/* create a get-attributes(length) request */
-	osd_req_get_attributes(or, &osdev->obj);
-
-	osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
-
-	/* execute op synchronously */
-	ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
-	if (ret)
-		goto out;
-
-	/* extract length from returned attribute info */
-	attr = g_attr_logical_length;
-	ret = extract_attr_from_req(or, &attr);
-	if (ret)
-		goto out;
-
-	*size_out = get_unaligned_be64(attr.val_ptr);
-
-out:
-	osd_end_request(or);
-	return ret;
-
-}
-
-static void osdblk_osd_complete(struct osd_request *or, void *private)
-{
-	struct osdblk_request *orq = private;
-	struct osd_sense_info osi;
-	int ret = osd_req_decode_sense(or, &osi);
-
-	if (ret) {
-		ret = -EIO;
-		OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
-	}
-
-	/* complete OSD request */
-	osd_end_request(or);
-
-	/* complete request passed to osdblk by block layer */
-	__blk_end_request_all(orq->rq, ret);
-}
-
-static void bio_chain_put(struct bio *chain)
-{
-	struct bio *tmp;
-
-	while (chain) {
-		tmp = chain;
-		chain = chain->bi_next;
-
-		bio_put(tmp);
-	}
-}
-
-static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
-{
-	struct bio *tmp, *new_chain = NULL, *tail = NULL;
-
-	while (old_chain) {
-		tmp = bio_clone_kmalloc(old_chain, gfpmask);
-		if (!tmp)
-			goto err_out;
-
-		tmp->bi_bdev = NULL;
-		gfpmask &= ~__GFP_DIRECT_RECLAIM;
-		tmp->bi_next = NULL;
-
-		if (!new_chain)
-			new_chain = tail = tmp;
-		else {
-			tail->bi_next = tmp;
-			tail = tmp;
-		}
-
-		old_chain = old_chain->bi_next;
-	}
-
-	return new_chain;
-
-err_out:
-	OSDBLK_DEBUG("bio_chain_clone with err\n");
-	bio_chain_put(new_chain);
-	return NULL;
-}
-
-static void osdblk_rq_fn(struct request_queue *q)
-{
-	struct osdblk_device *osdev = q->queuedata;
-
-	while (1) {
-		struct request *rq;
-		struct osdblk_request *orq;
-		struct osd_request *or;
-		struct bio *bio;
-		bool do_write, do_flush;
-
-		/* peek at request from block layer */
-		rq = blk_fetch_request(q);
-		if (!rq)
-			break;
-
-		/* deduce our operation (read, write, flush) */
-		/* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
-		 * into a clearly defined set of RPC commands:
-		 * read, write, flush, scsi command, power mgmt req,
-		 * driver-specific, etc.
-		 */
-
-		do_flush = (req_op(rq) == REQ_OP_FLUSH);
-		do_write = (rq_data_dir(rq) == WRITE);
-
-		if (!do_flush) { /* osd_flush does not use a bio */
-			/* a bio clone to be passed down to OSD request */
-			bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
-			if (!bio)
-				break;
-		} else
-			bio = NULL;
-
-		/* alloc internal OSD request, for OSD command execution */
-		or = osd_start_request(osdev->osd, GFP_ATOMIC);
-		if (!or) {
-			bio_chain_put(bio);
-			OSDBLK_DEBUG("osd_start_request with err\n");
-			break;
-		}
-
-		orq = &osdev->req[rq->tag];
-		orq->rq = rq;
-		orq->bio = bio;
-		orq->osdev = osdev;
-
-		/* init OSD command: flush, write or read */
-		if (do_flush)
-			osd_req_flush_object(or, &osdev->obj,
-					     OSD_CDB_FLUSH_ALL, 0, 0);
-		else if (do_write)
-			osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
-				      bio, blk_rq_bytes(rq));
-		else
-			osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
-				     bio, blk_rq_bytes(rq));
-
-		OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
-			do_flush ? "flush" : do_write ?
-				"write" : "read", blk_rq_bytes(rq),
-			blk_rq_pos(rq) * 512ULL);
-
-		/* begin OSD command execution */
-		if (osd_async_op(or, osdblk_osd_complete, orq,
-				 osdev->obj_cred)) {
-			osd_end_request(or);
-			blk_requeue_request(q, rq);
-			bio_chain_put(bio);
-			OSDBLK_DEBUG("osd_execute_request_async with err\n");
-			break;
-		}
-
-		/* remove the special 'flush' marker, now that the command
-		 * is executing
-		 */
-		rq->special = NULL;
-	}
-}
-
-static void osdblk_free_disk(struct osdblk_device *osdev)
-{
-	struct gendisk *disk = osdev->disk;
-
-	if (!disk)
-		return;
-
-	if (disk->flags & GENHD_FL_UP)
-		del_gendisk(disk);
-	if (disk->queue)
-		blk_cleanup_queue(disk->queue);
-	put_disk(disk);
-}
-
-static int osdblk_init_disk(struct osdblk_device *osdev)
-{
-	struct gendisk *disk;
-	struct request_queue *q;
-	int rc;
-	u64 obj_size = 0;
-
-	/* contact OSD, request size info about the object being mapped */
-	rc = osdblk_get_obj_size(osdev, &obj_size);
-	if (rc)
-		return rc;
-
-	/* create gendisk info */
-	disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
-	if (!disk)
-		return -ENOMEM;
-
-	sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
-	disk->major = osdev->major;
-	disk->first_minor = 0;
-	disk->fops = &osdblk_bd_ops;
-	disk->private_data = osdev;
-
-	/* init rq */
-	q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
-	if (!q) {
-		put_disk(disk);
-		return -ENOMEM;
-	}
-
-	/* switch queue to TCQ mode; allocate tag map */
-	rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO);
-	if (rc) {
-		blk_cleanup_queue(q);
-		put_disk(disk);
-		return rc;
-	}
-
-	/* Set our limits to the lower device limits, because osdblk cannot
-	 * sleep when allocating a lower-request and therefore cannot be
-	 * bouncing.
-	 */
-	blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
-
-	blk_queue_prep_rq(q, blk_queue_start_tag);
-	blk_queue_write_cache(q, true, false);
-
-	disk->queue = q;
-
-	q->queuedata = osdev;
-
-	osdev->disk = disk;
-	osdev->q = q;
-
-	/* finally, announce the disk to the world */
-	set_capacity(disk, obj_size / 512ULL);
-	add_disk(disk);
-
-	printk(KERN_INFO "%s: Added of size 0x%llx\n",
-		disk->disk_name, (unsigned long long)obj_size);
-
-	return 0;
-}
-
-/********************************************************************
- * /sys/class/osdblk/
- *                   add	map OSD object to blkdev
- *                   remove	unmap OSD object
- *                   list	show mappings
- *******************************************************************/
-
-static void class_osdblk_release(struct class *cls)
-{
-	kfree(cls);
-}
-
-static ssize_t class_osdblk_list(struct class *c,
-				struct class_attribute *attr,
-				char *data)
-{
-	int n = 0;
-	struct list_head *tmp;
-
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	list_for_each(tmp, &osdblkdev_list) {
-		struct osdblk_device *osdev;
-
-		osdev = list_entry(tmp, struct osdblk_device, node);
-
-		n += sprintf(data+n, "%d %d %llu %llu %s\n",
-			osdev->id,
-			osdev->major,
-			osdev->obj.partition,
-			osdev->obj.id,
-			osdev->osd_path);
-	}
-
-	mutex_unlock(&ctl_mutex);
-	return n;
-}
-
-static ssize_t class_osdblk_add(struct class *c,
-				struct class_attribute *attr,
-				const char *buf, size_t count)
-{
-	struct osdblk_device *osdev;
-	ssize_t rc;
-	int irc, new_id = 0;
-	struct list_head *tmp;
-
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
-	/* new osdblk_device object */
-	osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
-	if (!osdev) {
-		rc = -ENOMEM;
-		goto err_out_mod;
-	}
-
-	/* static osdblk_device initialization */
-	spin_lock_init(&osdev->lock);
-	INIT_LIST_HEAD(&osdev->node);
-
-	/* generate unique id: find highest unique id, add one */
-
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	list_for_each(tmp, &osdblkdev_list) {
-		struct osdblk_device *osdev;
-
-		osdev = list_entry(tmp, struct osdblk_device, node);
-		if (osdev->id > new_id)
-			new_id = osdev->id + 1;
-	}
-
-	osdev->id = new_id;
-
-	/* add to global list */
-	list_add_tail(&osdev->node, &osdblkdev_list);
-
-	mutex_unlock(&ctl_mutex);
-
-	/* parse add command */
-	if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
-		   osdev->osd_path) != 3) {
-		rc = -EINVAL;
-		goto err_out_slot;
-	}
-
-	/* initialize rest of new object */
-	sprintf(osdev->name, DRV_NAME "%d", osdev->id);
-
-	/* contact requested OSD */
-	osdev->osd = osduld_path_lookup(osdev->osd_path);
-	if (IS_ERR(osdev->osd)) {
-		rc = PTR_ERR(osdev->osd);
-		goto err_out_slot;
-	}
-
-	/* build OSD credential */
-	osdblk_make_credential(osdev->obj_cred, &osdev->obj);
-
-	/* register our block device */
-	irc = register_blkdev(0, osdev->name);
-	if (irc < 0) {
-		rc = irc;
-		goto err_out_osd;
-	}
-
-	osdev->major = irc;
-
-	/* set up and announce blkdev mapping */
-	rc = osdblk_init_disk(osdev);
-	if (rc)
-		goto err_out_blkdev;
-
-	return count;
-
-err_out_blkdev:
-	unregister_blkdev(osdev->major, osdev->name);
-err_out_osd:
-	osduld_put_device(osdev->osd);
-err_out_slot:
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-	list_del_init(&osdev->node);
-	mutex_unlock(&ctl_mutex);
-
-	kfree(osdev);
-err_out_mod:
-	OSDBLK_DEBUG("Error adding device %s\n", buf);
-	module_put(THIS_MODULE);
-	return rc;
-}
-
-static ssize_t class_osdblk_remove(struct class *c,
-					struct class_attribute *attr,
-					const char *buf,
-					size_t count)
-{
-	struct osdblk_device *osdev = NULL;
-	int target_id, rc;
-	unsigned long ul;
-	struct list_head *tmp;
-
-	rc = kstrtoul(buf, 10, &ul);
-	if (rc)
-		return rc;
-
-	/* convert to int; abort if we lost anything in the conversion */
-	target_id = (int) ul;
-	if (target_id != ul)
-		return -EINVAL;
-
-	/* remove object from list immediately */
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-
-	list_for_each(tmp, &osdblkdev_list) {
-		osdev = list_entry(tmp, struct osdblk_device, node);
-		if (osdev->id == target_id) {
-			list_del_init(&osdev->node);
-			break;
-		}
-		osdev = NULL;
-	}
-
-	mutex_unlock(&ctl_mutex);
-
-	if (!osdev)
-		return -ENOENT;
-
-	/* clean up and free blkdev and associated OSD connection */
-	osdblk_free_disk(osdev);
-	unregister_blkdev(osdev->major, osdev->name);
-	osduld_put_device(osdev->osd);
-	kfree(osdev);
-
-	/* release module ref */
-	module_put(THIS_MODULE);
-
-	return count;
-}
-
-static struct class_attribute class_osdblk_attrs[] = {
-	__ATTR(add,	0200, NULL, class_osdblk_add),
-	__ATTR(remove,	0200, NULL, class_osdblk_remove),
-	__ATTR(list,	0444, class_osdblk_list, NULL),
-	__ATTR_NULL
-};
-
-static int osdblk_sysfs_init(void)
-{
-	int ret = 0;
-
-	/*
-	 * create control files in sysfs
-	 * /sys/class/osdblk/...
-	 */
-	class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
-	if (!class_osdblk)
-		return -ENOMEM;
-
-	class_osdblk->name = DRV_NAME;
-	class_osdblk->owner = THIS_MODULE;
-	class_osdblk->class_release = class_osdblk_release;
-	class_osdblk->class_attrs = class_osdblk_attrs;
-
-	ret = class_register(class_osdblk);
-	if (ret) {
-		kfree(class_osdblk);
-		class_osdblk = NULL;
-		printk(PFX "failed to create class osdblk\n");
-		return ret;
-	}
-
-	return 0;
-}
-
-static void osdblk_sysfs_cleanup(void)
-{
-	if (class_osdblk)
-		class_destroy(class_osdblk);
-	class_osdblk = NULL;
-}
-
-static int __init osdblk_init(void)
-{
-	int rc;
-
-	rc = osdblk_sysfs_init();
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
-static void __exit osdblk_exit(void)
-{
-	osdblk_sysfs_cleanup();
-}
-
-module_init(osdblk_init);
-module_exit(osdblk_exit);
-
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 939641d6e262..b1267ef34d5a 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -300,6 +300,11 @@ static void pcd_init_units(void)
 		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
+		disk->queue = blk_init_queue(do_pcd_request, &pcd_lock);
+		if (!disk->queue) {
+			put_disk(disk);
+			continue;
+		}
 		cd->disk = disk;
 		cd->pi = &cd->pia;
 		cd->present = 0;
@@ -735,18 +740,36 @@ static int pcd_detect(void)
 }
 
 /* I/O request processing */
-static struct request_queue *pcd_queue;
+static int pcd_queue;
+
+static int set_next_request(void)
+{
+	struct pcd_unit *cd;
+	struct request_queue *q;
+	int old_pos = pcd_queue;
+
+	do {
+		cd = &pcd[pcd_queue];
+		q = cd->present ? cd->disk->queue : NULL;
+		if (++pcd_queue == PCD_UNITS)
+			pcd_queue = 0;
+		if (q) {
+			pcd_req = blk_fetch_request(q);
+			if (pcd_req)
+				break;
+		}
+	} while (pcd_queue != old_pos);
+
+	return pcd_req != NULL;
+}
 
-static void do_pcd_request(struct request_queue * q)
+static void pcd_request(void)
 {
 	if (pcd_busy)
 		return;
 	while (1) {
-		if (!pcd_req) {
-			pcd_req = blk_fetch_request(q);
-			if (!pcd_req)
-				return;
-		}
+		if (!pcd_req && !set_next_request())
+			return;
 
 		if (rq_data_dir(pcd_req) == READ) {
 			struct pcd_unit *cd = pcd_req->rq_disk->private_data;
@@ -766,6 +789,11 @@ static void do_pcd_request(struct request_queue * q)
 	}
 }
 
+static void do_pcd_request(struct request_queue *q)
+{
+	pcd_request();
+}
+
 static inline void next_request(int err)
 {
 	unsigned long saved_flags;
@@ -774,7 +802,7 @@ static inline void next_request(int err)
 	if (!__blk_end_request_cur(pcd_req, err))
 		pcd_req = NULL;
 	pcd_busy = 0;
-	do_pcd_request(pcd_queue);
+	pcd_request();
 	spin_unlock_irqrestore(&pcd_lock, saved_flags);
 }
 
@@ -849,7 +877,7 @@ static void do_pcd_read_drq(void)
 
 	do_pcd_read();
 	spin_lock_irqsave(&pcd_lock, saved_flags);
-	do_pcd_request(pcd_queue);
+	pcd_request();
 	spin_unlock_irqrestore(&pcd_lock, saved_flags);
 }
 
@@ -957,19 +985,10 @@ static int __init pcd_init(void)
 		return -EBUSY;
 	}
 
-	pcd_queue = blk_init_queue(do_pcd_request, &pcd_lock);
-	if (!pcd_queue) {
-		unregister_blkdev(major, name);
-		for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
-			put_disk(cd->disk);
-		return -ENOMEM;
-	}
-
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
 		if (cd->present) {
 			register_cdrom(&cd->info);
 			cd->disk->private_data = cd;
-			cd->disk->queue = pcd_queue;
 			add_disk(cd->disk);
 		}
 	}
@@ -988,9 +1007,9 @@ static void __exit pcd_exit(void)
 			pi_release(cd->pi);
 			unregister_cdrom(&cd->info);
 		}
+		blk_cleanup_queue(cd->disk->queue);
 		put_disk(cd->disk);
 	}
-	blk_cleanup_queue(pcd_queue);
 	unregister_blkdev(major, name);
 	pi_unregister_driver(par_drv);
 }
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 9cfd2e06a649..7d2402f90978 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -381,12 +381,33 @@ static enum action do_pd_write_start(void);
 static enum action do_pd_read_drq(void);
 static enum action do_pd_write_done(void);
 
-static struct request_queue *pd_queue;
+static int pd_queue;
 static int pd_claimed;
 
 static struct pd_unit *pd_current; /* current request's drive */
 static PIA *pi_current; /* current request's PIA */
 
+static int set_next_request(void)
+{
+	struct gendisk *disk;
+	struct request_queue *q;
+	int old_pos = pd_queue;
+
+	do {
+		disk = pd[pd_queue].gd;
+		q = disk ? disk->queue : NULL;
+		if (++pd_queue == PD_UNITS)
+			pd_queue = 0;
+		if (q) {
+			pd_req = blk_fetch_request(q);
+			if (pd_req)
+				break;
+		}
+	} while (pd_queue != old_pos);
+
+	return pd_req != NULL;
+}
+
 static void run_fsm(void)
 {
 	while (1) {
@@ -418,8 +439,7 @@ static void run_fsm(void)
 				spin_lock_irqsave(&pd_lock, saved_flags);
 				if (!__blk_end_request_cur(pd_req,
 						res == Ok ? 0 : -EIO)) {
-					pd_req = blk_fetch_request(pd_queue);
-					if (!pd_req)
+					if (!set_next_request())
 						stop = 1;
 				}
 				spin_unlock_irqrestore(&pd_lock, saved_flags);
@@ -719,18 +739,15 @@ static int pd_special_command(struct pd_unit *disk,
 		      enum action (*func)(struct pd_unit *disk))
 {
 	struct request *rq;
-	int err = 0;
 
 	rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
 	rq->special = func;
-
-	err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
-
+	blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
 	blk_put_request(rq);
-	return err;
+	return 0;
 }
 
 /* kernel glue structures */
@@ -839,7 +856,13 @@ static void pd_probe_drive(struct pd_unit *disk)
 	p->first_minor = (disk - pd) << PD_BITS;
 	disk->gd = p;
 	p->private_data = disk;
-	p->queue = pd_queue;
+	p->queue = blk_init_queue(do_pd_request, &pd_lock);
+	if (!p->queue) {
+		disk->gd = NULL;
+		put_disk(p);
+		return;
+	}
+	blk_queue_max_hw_sectors(p->queue, cluster);
 
 	if (disk->drive == -1) {
 		for (disk->drive = 0; disk->drive <= 1; disk->drive++)
@@ -919,26 +942,18 @@ static int __init pd_init(void)
 	if (disable)
 		goto out1;
 
-	pd_queue = blk_init_queue(do_pd_request, &pd_lock);
-	if (!pd_queue)
-		goto out1;
-
-	blk_queue_max_hw_sectors(pd_queue, cluster);
-
 	if (register_blkdev(major, name))
-		goto out2;
+		goto out1;
 
 	printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
 	       name, name, PD_VERSION, major, cluster, nice);
 	if (!pd_detect())
-		goto out3;
+		goto out2;
 
 	return 0;
 
-out3:
-	unregister_blkdev(major, name);
 out2:
-	blk_cleanup_queue(pd_queue);
+	unregister_blkdev(major, name);
 out1:
 	return -ENODEV;
 }
@@ -953,11 +968,11 @@ static void __exit pd_exit(void)
 		if (p) {
 			disk->gd = NULL;
 			del_gendisk(p);
+			blk_cleanup_queue(p->queue);
 			put_disk(p);
 			pi_release(disk->pi);
 		}
 	}
-	blk_cleanup_queue(pd_queue);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 14c5d32f5d8b..f24ca7315ddc 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -287,6 +287,12 @@ static void __init pf_init_units(void)
 		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			continue;
+		disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock);
+		if (!disk->queue) {
+			put_disk(disk);
+			return;
+		}
+		blk_queue_max_segments(disk->queue, cluster);
 		pf->disk = disk;
 		pf->pi = &pf->pia;
 		pf->media_status = PF_NM;
@@ -772,7 +778,28 @@ static int pf_ready(void)
 	return (((status_reg(pf_current) & (STAT_BUSY | pf_mask)) == pf_mask));
 }
 
-static struct request_queue *pf_queue;
+static int pf_queue;
+
+static int set_next_request(void)
+{
+	struct pf_unit *pf;
+	struct request_queue *q;
+	int old_pos = pf_queue;
+
+	do {
+		pf = &units[pf_queue];
+		q = pf->present ? pf->disk->queue : NULL;
+		if (++pf_queue == PF_UNITS)
+			pf_queue = 0;
+		if (q) {
+			pf_req = blk_fetch_request(q);
+			if (pf_req)
+				break;
+		}
+	} while (pf_queue != old_pos);
+
+	return pf_req != NULL;
+}
 
 static void pf_end_request(int err)
 {
@@ -780,16 +807,13 @@ static void pf_end_request(int err)
 		pf_req = NULL;
 }
 
-static void do_pf_request(struct request_queue * q)
+static void pf_request(void)
 {
 	if (pf_busy)
 		return;
 repeat:
-	if (!pf_req) {
-		pf_req = blk_fetch_request(q);
-		if (!pf_req)
-			return;
-	}
+	if (!pf_req && !set_next_request())
+		return;
 
 	pf_current = pf_req->rq_disk->private_data;
 	pf_block = blk_rq_pos(pf_req);
@@ -817,6 +841,11 @@ repeat:
 	}
 }
 
+static void do_pf_request(struct request_queue *q)
+{
+	pf_request();
+}
+
 static int pf_next_buf(void)
 {
 	unsigned long saved_flags;
@@ -846,7 +875,7 @@ static inline void next_request(int err)
 	spin_lock_irqsave(&pf_spin_lock, saved_flags);
 	pf_end_request(err);
 	pf_busy = 0;
-	do_pf_request(pf_queue);
+	pf_request();
 	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 }
 
@@ -972,15 +1001,6 @@ static int __init pf_init(void)
 			put_disk(pf->disk);
 		return -EBUSY;
 	}
-	pf_queue = blk_init_queue(do_pf_request, &pf_spin_lock);
-	if (!pf_queue) {
-		unregister_blkdev(major, name);
-		for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
-			put_disk(pf->disk);
-		return -ENOMEM;
-	}
-
-	blk_queue_max_segments(pf_queue, cluster);
 
 	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
 		struct gendisk *disk = pf->disk;
@@ -988,7 +1008,6 @@ static int __init pf_init(void)
 		if (!pf->present)
 			continue;
 		disk->private_data = pf;
-		disk->queue = pf_queue;
 		add_disk(disk);
 	}
 	return 0;
@@ -1003,10 +1022,10 @@ static void __exit pf_exit(void)
 		if (!pf->present)
 			continue;
 		del_gendisk(pf->disk);
+		blk_cleanup_queue(pf->disk->queue);
 		put_disk(pf->disk);
 		pi_release(pf->pi);
 	}
-	blk_cleanup_queue(pf_queue);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 66d846ba85a9..205b865ebeb9 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -724,7 +724,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 		rq->rq_flags |= RQF_QUIET;
 
 	blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
-	if (rq->errors)
+	if (scsi_req(rq)->result)
 		ret = -EIO;
 out:
 	blk_put_request(rq);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 517838b65964..089ac4179919 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4317,7 +4317,7 @@ static int rbd_init_request(void *data, struct request *rq,
 	return 0;
 }
 
-static struct blk_mq_ops rbd_mq_ops = {
+static const struct blk_mq_ops rbd_mq_ops = {
 	.queue_rq	= rbd_queue_rq,
 	.init_request	= rbd_init_request,
 };
@@ -4380,7 +4380,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	q->limits.discard_granularity = segment_size;
 	q->limits.discard_alignment = segment_size;
 	blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
-	q->limits.discard_zeroes_data = 1;
 
 	if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
 		q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index f81d70b39d10..9c566364ac9c 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -300,7 +300,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 						RSXX_HW_BLK_SIZE >> 9);
 		card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
 		card->queue->limits.discard_alignment   = RSXX_HW_BLK_SIZE;
-		card->queue->limits.discard_zeroes_data = 1;
 	}
 
 	card->queue->queuedata = card;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index b5afd495d482..3064be6cf375 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -211,7 +211,7 @@ enum head {
 struct swim_priv {
 	struct swim __iomem *base;
 	spinlock_t lock;
-	struct request_queue *queue;
+	int fdc_queue;
 	int floppy_count;
 	struct floppy_state unit[FD_MAX_UNIT];
 };
@@ -525,12 +525,33 @@ static int floppy_read_sectors(struct floppy_state *fs,
 	return 0;
 }
 
-static void redo_fd_request(struct request_queue *q)
+static struct request *swim_next_request(struct swim_priv *swd)
 {
+	struct request_queue *q;
+	struct request *rq;
+	int old_pos = swd->fdc_queue;
+
+	do {
+		q = swd->unit[swd->fdc_queue].disk->queue;
+		if (++swd->fdc_queue == swd->floppy_count)
+			swd->fdc_queue = 0;
+		if (q) {
+			rq = blk_fetch_request(q);
+			if (rq)
+				return rq;
+		}
+	} while (swd->fdc_queue != old_pos);
+
+	return NULL;
+}
+
+static void do_fd_request(struct request_queue *q)
+{
+	struct swim_priv *swd = q->queuedata;
 	struct request *req;
 	struct floppy_state *fs;
 
-	req = blk_fetch_request(q);
+	req = swim_next_request(swd);
 	while (req) {
 		int err = -EIO;
 
@@ -554,15 +575,10 @@ static void redo_fd_request(struct request_queue *q)
 		}
 	done:
 		if (!__blk_end_request_cur(req, err))
-			req = blk_fetch_request(q);
+			req = swim_next_request(swd);
 	}
 }
 
-static void do_fd_request(struct request_queue *q)
-{
-	redo_fd_request(q);
-}
-
 static struct floppy_struct floppy_type[4] = {
 	{    0,  0, 0,  0, 0, 0x00, 0x00, 0x00, 0x00, NULL }, /* no testing   */
 	{  720,  9, 1, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 360KB SS 3.5"*/
@@ -833,22 +849,25 @@ static int swim_floppy_init(struct swim_priv *swd)
 		return -EBUSY;
 	}
 
+	spin_lock_init(&swd->lock);
+
 	for (drive = 0; drive < swd->floppy_count; drive++) {
 		swd->unit[drive].disk = alloc_disk(1);
 		if (swd->unit[drive].disk == NULL) {
 			err = -ENOMEM;
 			goto exit_put_disks;
 		}
+		swd->unit[drive].disk->queue = blk_init_queue(do_fd_request,
+							      &swd->lock);
+		if (!swd->unit[drive].disk->queue) {
+			err = -ENOMEM;
+			put_disk(swd->unit[drive].disk);
+			goto exit_put_disks;
+		}
+		swd->unit[drive].disk->queue->queuedata = swd;
 		swd->unit[drive].swd = swd;
 	}
 
-	spin_lock_init(&swd->lock);
-	swd->queue = blk_init_queue(do_fd_request, &swd->lock);
-	if (!swd->queue) {
-		err = -ENOMEM;
-		goto exit_put_disks;
-	}
-
 	for (drive = 0; drive < swd->floppy_count; drive++) {
 		swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE;
 		swd->unit[drive].disk->major = FLOPPY_MAJOR;
@@ -856,7 +875,6 @@ static int swim_floppy_init(struct swim_priv *swd)
 		sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
 		swd->unit[drive].disk->fops = &floppy_fops;
 		swd->unit[drive].disk->private_data = &swd->unit[drive];
-		swd->unit[drive].disk->queue = swd->queue;
 		set_capacity(swd->unit[drive].disk, 2880);
 		add_disk(swd->unit[drive].disk);
 	}
@@ -943,13 +961,12 @@ static int swim_remove(struct platform_device *dev)
 
 	for (drive = 0; drive < swd->floppy_count; drive++) {
 		del_gendisk(swd->unit[drive].disk);
+		blk_cleanup_queue(swd->unit[drive].disk->queue);
 		put_disk(swd->unit[drive].disk);
 	}
 
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 
-	blk_cleanup_queue(swd->queue);
-
 	/* eject floppies */
 
 	for (drive = 0; drive < swd->floppy_count; drive++)
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 61b3ffa4f458..ba4809c9bdba 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -343,8 +343,8 @@ static void start_request(struct floppy_state *fs)
 			  req->rq_disk->disk_name, req->cmd,
 			  (long)blk_rq_pos(req), blk_rq_sectors(req),
 			  bio_data(req->bio));
-		swim3_dbg("           errors=%d current_nr_sectors=%u\n",
-			  req->errors, blk_rq_cur_sectors(req));
+		swim3_dbg("           current_nr_sectors=%u\n",
+			  blk_rq_cur_sectors(req));
 #endif
 
 		if (blk_rq_pos(req) >= fs->total_secs) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1d4c9f8bc1e1..f94614257462 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -111,7 +111,7 @@ static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
-static inline void virtblk_scsi_reques_done(struct request *req)
+static inline void virtblk_scsi_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	struct virtio_blk *vblk = req->q->queuedata;
@@ -119,7 +119,7 @@ static inline void virtblk_scsi_reques_done(struct request *req)
 
 	sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
 	sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
-	req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
+	sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
 }
 
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
@@ -144,7 +144,7 @@ static inline int virtblk_add_req_scsi(struct virtqueue *vq,
 {
 	return -EIO;
 }
-static inline void virtblk_scsi_reques_done(struct request *req)
+static inline void virtblk_scsi_request_done(struct request *req)
 {
 }
 #define virtblk_ioctl	NULL
@@ -175,19 +175,15 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
-	int error = virtblk_result(vbr);
 
 	switch (req_op(req)) {
 	case REQ_OP_SCSI_IN:
 	case REQ_OP_SCSI_OUT:
-		virtblk_scsi_reques_done(req);
-		break;
-	case REQ_OP_DRV_IN:
-		req->errors = (error != 0);
+		virtblk_scsi_request_done(req);
 		break;
 	}
 
-	blk_mq_end_request(req, error);
+	blk_mq_end_request(req, virtblk_result(vbr));
 }
 
 static void virtblk_done(struct virtqueue *vq)
@@ -205,7 +201,7 @@ static void virtblk_done(struct virtqueue *vq)
 		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
 			struct request *req = blk_mq_rq_from_pdu(vbr);
 
-			blk_mq_complete_request(req, req->errors);
+			blk_mq_complete_request(req);
 			req_done = true;
 		}
 		if (unlikely(virtqueue_is_broken(vq)))
@@ -310,7 +306,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 	if (err)
 		goto out;
 
-	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
+	blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
+	err = virtblk_result(blk_mq_rq_to_pdu(req));
 out:
 	blk_put_request(req);
 	return err;
@@ -597,7 +594,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set)
 	return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
 }
 
-static struct blk_mq_ops virtio_mq_ops = {
+static const struct blk_mq_ops virtio_mq_ops = {
 	.queue_rq	= virtio_queue_rq,
 	.complete	= virtblk_request_done,
 	.init_request	= virtblk_init_request,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 5067a0a952cb..39459631667c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -115,6 +115,15 @@ struct split_bio {
 	atomic_t pending;
 };
 
+struct blkif_req {
+	int	error;
+};
+
+static inline struct blkif_req *blkif_req(struct request *rq)
+{
+	return blk_mq_rq_to_pdu(rq);
+}
+
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 
@@ -907,8 +916,14 @@ out_busy:
 	return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
-static struct blk_mq_ops blkfront_mq_ops = {
+static void blkif_complete_rq(struct request *rq)
+{
+	blk_mq_end_request(rq, blkif_req(rq)->error);
+}
+
+static const struct blk_mq_ops blkfront_mq_ops = {
 	.queue_rq = blkif_queue_rq,
+	.complete = blkif_complete_rq,
 };
 
 static void blkif_set_queue_limits(struct blkfront_info *info)
@@ -969,7 +984,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 		info->tag_set.queue_depth = BLK_RING_SIZE(info);
 	info->tag_set.numa_node = NUMA_NO_NODE;
 	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
-	info->tag_set.cmd_size = 0;
+	info->tag_set.cmd_size = sizeof(struct blkif_req);
 	info->tag_set.driver_data = info;
 
 	if (blk_mq_alloc_tag_set(&info->tag_set))
@@ -1543,7 +1558,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 	unsigned long flags;
 	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
 	struct blkfront_info *info = rinfo->dev_info;
-	int error;
 
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 		return IRQ_HANDLED;
@@ -1587,37 +1601,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			continue;
 		}
 
-		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
 		switch (bret->operation) {
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret->operation));
-				error = -EOPNOTSUPP;
+				blkif_req(req)->error = -EOPNOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
 				queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
 			}
-			blk_mq_complete_request(req, error);
 			break;
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				error = -EOPNOTSUPP;
+				blkif_req(req)->error = -EOPNOTSUPP;
 			}
 			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				error = -EOPNOTSUPP;
+				blkif_req(req)->error = -EOPNOTSUPP;
 			}
-			if (unlikely(error)) {
-				if (error == -EOPNOTSUPP)
-					error = 0;
+			if (unlikely(blkif_req(req)->error)) {
+				if (blkif_req(req)->error == -EOPNOTSUPP)
+					blkif_req(req)->error = 0;
 				info->feature_fua = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
@@ -1629,11 +1642,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
 					"request: %x\n", bret->status);
 
-			blk_mq_complete_request(req, error);
 			break;
 		default:
 			BUG();
 		}
+
+		blk_mq_complete_request(req);
 	}
 
 	rinfo->ring.rsp_cons = i;
@@ -2345,6 +2359,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	unsigned long sector_size;
 	unsigned int physical_sector_size;
 	unsigned int binfo;
+	char *envp[] = { "RESIZE=1", NULL };
 	int err, i;
 
 	switch (info->connected) {
@@ -2361,6 +2376,8 @@ static void blkfront_connect(struct blkfront_info *info)
 		       sectors);
 		set_capacity(info->gd, sectors);
 		revalidate_disk(info->gd);
+		kobject_uevent_env(&disk_to_dev(info->gd)->kobj,
+				   KOBJ_CHANGE, envp);
 
 		return;
 	case BLKIF_STATE_SUSPENDED:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0c09d4256108..6fac5fedd610 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -829,10 +829,14 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 	offset = (bio->bi_iter.bi_sector &
 		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
 
-	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
 		zram_bio_discard(zram, index, offset, bio);
 		bio_endio(bio);
 		return;
+	default:
+		break;
 	}
 
 	bio_for_each_segment(bvec, bio, iter) {
@@ -1192,6 +1196,8 @@ static int zram_add(void)
 	zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
 	zram->disk->queue->limits.chunk_sectors = 0;
 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
+
 	/*
 	 * zram_bio_discard() will clear all logical blocks if logical block
 	 * size is identical with physical block size(PAGE_SIZE). But if it is
@@ -1201,10 +1207,7 @@ static int zram_add(void)
 	 * zeroed.
 	 */
 	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
-		zram->disk->queue->limits.discard_zeroes_data = 1;
-	else
-		zram->disk->queue->limits.discard_zeroes_data = 0;
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
+		blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
 
 	add_disk(zram->disk);
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 87739649eac2..76c952fd9ab9 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2218,7 +2218,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
-		if (blk_execute_rq(q, cdi->disk, rq, 0)) {
+		blk_execute_rq(q, cdi->disk, rq, 0);
+		if (scsi_req(rq)->result) {
 			struct request_sense *s = req->sense;
 			ret = -EIO;
 			cdi->last_sense = s->sense_key;
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
index d6f5d9eb102d..70d434bc1cbf 100644
--- a/drivers/char/ipmi/bt-bmc.c
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -523,6 +523,7 @@ static int bt_bmc_remove(struct platform_device *pdev)
 
 static const struct of_device_id bt_bmc_match[] = {
 	{ .compatible = "aspeed,ast2400-ibt-bmc" },
+	{ .compatible = "aspeed,ast2500-ibt-bmc" },
 	{ },
 };
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 2a7c425ddfa7..b2b618f066e0 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1954,7 +1954,9 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
 				kfree(info);
 				goto out;
 			}
+			mutex_lock(&smi_infos_lock);
 			rv = try_smi_init(info);
+			mutex_unlock(&smi_infos_lock);
 			if (rv) {
 				cleanup_one_si(info);
 				goto out;
@@ -2042,8 +2044,10 @@ static int hardcode_find_bmc(void)
 		info->slave_addr = slave_addrs[i];
 
 		if (!add_smi(info)) {
+			mutex_lock(&smi_infos_lock);
 			if (try_smi_init(info))
 				cleanup_one_si(info);
+			mutex_unlock(&smi_infos_lock);
 			ret = 0;
 		} else {
 			kfree(info);
@@ -3492,6 +3496,11 @@ out_err:
 	return rv;
 }
 
+/*
+ * Try to start up an interface.  Must be called with smi_infos_lock
+ * held, primarily to keep smi_num consistent, we only one to do these
+ * one at a time.
+ */
 static int try_smi_init(struct smi_info *new_smi)
 {
 	int rv = 0;
@@ -3524,9 +3533,12 @@ static int try_smi_init(struct smi_info *new_smi)
 		goto out_err;
 	}
 
+	new_smi->intf_num = smi_num;
+
 	/* Do this early so it's available for logs. */
 	if (!new_smi->dev) {
-		init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d", 0);
+		init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d",
+				      new_smi->intf_num);
 
 		/*
 		 * If we don't already have a device from something
@@ -3593,8 +3605,6 @@ static int try_smi_init(struct smi_info *new_smi)
 
 	new_smi->interrupt_disabled = true;
 	atomic_set(&new_smi->need_watch, 0);
-	new_smi->intf_num = smi_num;
-	smi_num++;
 
 	rv = try_enable_event_buffer(new_smi);
 	if (rv == 0)
@@ -3661,6 +3671,9 @@ static int try_smi_init(struct smi_info *new_smi)
 		goto out_err_stop_timer;
 	}
 
+	/* Don't increment till we know we have succeeded. */
+	smi_num++;
+
 	dev_info(new_smi->dev, "IPMI %s interface initialized\n",
 		 si_to_str[new_smi->si_type]);
 
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index cca6e5bc1cea..0b22a9be5029 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -891,6 +891,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
 		 * for details on the intricacies of this.
 		 */
 		int left;
+		unsigned char *data_to_send;
 
 		ssif_inc_stat(ssif_info, sent_messages_parts);
 
@@ -899,6 +900,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
 			left = 32;
 		/* Length byte. */
 		ssif_info->multi_data[ssif_info->multi_pos] = left;
+		data_to_send = ssif_info->multi_data + ssif_info->multi_pos;
 		ssif_info->multi_pos += left;
 		if (left < 32)
 			/*
@@ -912,7 +914,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
 		rv = ssif_i2c_send(ssif_info, msg_written_handler,
 				  I2C_SMBUS_WRITE,
 				  SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE,
-				  ssif_info->multi_data + ssif_info->multi_pos,
+				  data_to_send,
 				  I2C_SMBUS_BLOCK_DATA);
 		if (rv < 0) {
 			/* request failed, just return the error. */
@@ -1642,9 +1644,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	spin_lock_init(&ssif_info->lock);
 	ssif_info->ssif_state = SSIF_NORMAL;
-	init_timer(&ssif_info->retry_timer);
-	ssif_info->retry_timer.data = (unsigned long) ssif_info;
-	ssif_info->retry_timer.function = retry_timeout;
+	setup_timer(&ssif_info->retry_timer, retry_timeout,
+		    (unsigned long)ssif_info);
 
 	for (i = 0; i < SSIF_NUM_STATS; i++)
 		atomic_set(&ssif_info->stats[i], 0);
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 5ca24d9b101b..d165af8abe36 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -516,7 +516,7 @@ static void panic_halt_ipmi_heartbeat(void)
 	msg.cmd = IPMI_WDOG_RESET_TIMER;
 	msg.data = NULL;
 	msg.data_len = 0;
-	atomic_add(2, &panic_done_count);
+	atomic_add(1, &panic_done_count);
 	rv = ipmi_request_supply_msgs(watchdog_user,
 				      (struct ipmi_addr *) &addr,
 				      0,
@@ -526,7 +526,7 @@ static void panic_halt_ipmi_heartbeat(void)
 				      &panic_halt_heartbeat_recv_msg,
 				      1);
 	if (rv)
-		atomic_sub(2, &panic_done_count);
+		atomic_sub(1, &panic_done_count);
 }
 
 static struct ipmi_smi_msg panic_halt_smi_msg = {
@@ -550,12 +550,12 @@ static void panic_halt_ipmi_set_timeout(void)
 	/* Wait for the messages to be free. */
 	while (atomic_read(&panic_done_count) != 0)
 		ipmi_poll_interface(watchdog_user);
-	atomic_add(2, &panic_done_count);
+	atomic_add(1, &panic_done_count);
 	rv = i_ipmi_set_timeout(&panic_halt_smi_msg,
 				&panic_halt_recv_msg,
 				&send_heartbeat_now);
 	if (rv) {
-		atomic_sub(2, &panic_done_count);
+		atomic_sub(1, &panic_done_count);
 		printk(KERN_WARNING PFX
 		       "Unable to extend the watchdog timeout.");
 	} else {
diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index ab609a76706f..cf9449b3dbd9 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -429,6 +429,13 @@ static const struct clk_div_table pll_divp_table[] = {
 	{ 0, 2 }, { 1, 4 }, { 2, 6 }, { 3, 8 }, { 0 }
 };
 
+static const struct clk_div_table pll_divq_table[] = {
+	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 },
+	{ 8, 8 }, { 9, 9 }, { 10, 10 }, { 11, 11 }, { 12, 12 }, { 13, 13 },
+	{ 14, 14 }, { 15, 15 },
+	{ 0 }
+};
+
 static const struct clk_div_table pll_divr_table[] = {
 	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, { 0 }
 };
@@ -496,9 +503,9 @@ struct stm32f4_div_data {
 
 #define MAX_PLL_DIV 3
 static const struct stm32f4_div_data  div_data[MAX_PLL_DIV] = {
-	{ 16, 2, 0,			pll_divp_table	},
-	{ 24, 4, CLK_DIVIDER_ONE_BASED, NULL		},
-	{ 28, 3, 0,			pll_divr_table	},
+	{ 16, 2, 0, pll_divp_table },
+	{ 24, 4, 0, pll_divq_table },
+	{ 28, 3, 0, pll_divr_table },
 };
 
 struct stm32f4_pll_data {
diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 72109d2cf41b..a077ab6edffa 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -1,6 +1,7 @@
 config SUNXI_CCU
 	bool "Clock support for Allwinner SoCs"
 	depends on ARCH_SUNXI || COMPILE_TEST
+	select RESET_CONTROLLER
 	default ARCH_SUNXI
 
 if SUNXI_CCU
@@ -15,7 +16,7 @@ config SUNXI_CCU_FRAC
 	bool
 
 config SUNXI_CCU_GATE
-	bool
+	def_bool y
 
 config SUNXI_CCU_MUX
 	bool
@@ -135,6 +136,7 @@ config SUN8I_V3S_CCU
 config SUN9I_A80_CCU
 	bool "Support for the Allwinner A80 CCU"
 	select SUNXI_CCU_DIV
+	select SUNXI_CCU_MULT
 	select SUNXI_CCU_GATE
 	select SUNXI_CCU_NKMP
 	select SUNXI_CCU_NM
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
index a7b3c08ed0e2..2c69b631967a 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
@@ -752,6 +752,13 @@ static const struct sunxi_ccu_desc sun8i_a33_ccu_desc = {
 	.num_resets	= ARRAY_SIZE(sun8i_a33_ccu_resets),
 };
 
+static struct ccu_pll_nb sun8i_a33_pll_cpu_nb = {
+	.common	= &pll_cpux_clk.common,
+	/* copy from pll_cpux_clk */
+	.enable	= BIT(31),
+	.lock	= BIT(28),
+};
+
 static struct ccu_mux_nb sun8i_a33_cpu_nb = {
 	.common		= &cpux_clk.common,
 	.cm		= &cpux_clk.mux,
@@ -783,6 +790,10 @@ static void __init sun8i_a33_ccu_setup(struct device_node *node)
 
 	sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc);
 
+	/* Gate then ungate PLL CPU after any rate changes */
+	ccu_pll_notifier_register(&sun8i_a33_pll_cpu_nb);
+
+	/* Reparent CPU during PLL CPU rate changes */
 	ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk,
 				  &sun8i_a33_cpu_nb);
 }
diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index 8a47bafd7890..9d8724715a43 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -14,11 +14,13 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/iopoll.h>
 #include <linux/slab.h>
 
 #include "ccu_common.h"
+#include "ccu_gate.h"
 #include "ccu_reset.h"
 
 static DEFINE_SPINLOCK(ccu_lock);
@@ -39,6 +41,53 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock)
 	WARN_ON(readl_relaxed_poll_timeout(addr, reg, reg & lock, 100, 70000));
 }
 
+/*
+ * This clock notifier is called when the frequency of a PLL clock is
+ * changed. In common PLL designs, changes to the dividers take effect
+ * almost immediately, while changes to the multipliers (implemented
+ * as dividers in the feedback loop) take a few cycles to work into
+ * the feedback loop for the PLL to stablize.
+ *
+ * Sometimes when the PLL clock rate is changed, the decrease in the
+ * divider is too much for the decrease in the multiplier to catch up.
+ * The PLL clock rate will spike, and in some cases, might lock up
+ * completely.
+ *
+ * This notifier callback will gate and then ungate the clock,
+ * effectively resetting it, so it proceeds to work. Care must be
+ * taken to reparent consumers to other temporary clocks during the
+ * rate change, and that this notifier callback must be the first
+ * to be registered.
+ */
+static int ccu_pll_notifier_cb(struct notifier_block *nb,
+			       unsigned long event, void *data)
+{
+	struct ccu_pll_nb *pll = to_ccu_pll_nb(nb);
+	int ret = 0;
+
+	if (event != POST_RATE_CHANGE)
+		goto out;
+
+	ccu_gate_helper_disable(pll->common, pll->enable);
+
+	ret = ccu_gate_helper_enable(pll->common, pll->enable);
+	if (ret)
+		goto out;
+
+	ccu_helper_wait_for_lock(pll->common, pll->lock);
+
+out:
+	return notifier_from_errno(ret);
+}
+
+int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb)
+{
+	pll_nb->clk_nb.notifier_call = ccu_pll_notifier_cb;
+
+	return clk_notifier_register(pll_nb->common->hw.clk,
+				     &pll_nb->clk_nb);
+}
+
 int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
 		    const struct sunxi_ccu_desc *desc)
 {
diff --git a/drivers/clk/sunxi-ng/ccu_common.h b/drivers/clk/sunxi-ng/ccu_common.h
index 73d81dc58fc5..d6fdd7a789aa 100644
--- a/drivers/clk/sunxi-ng/ccu_common.h
+++ b/drivers/clk/sunxi-ng/ccu_common.h
@@ -83,6 +83,18 @@ struct sunxi_ccu_desc {
 
 void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock);
 
+struct ccu_pll_nb {
+	struct notifier_block	clk_nb;
+	struct ccu_common	*common;
+
+	u32	enable;
+	u32	lock;
+};
+
+#define to_ccu_pll_nb(_nb) container_of(_nb, struct ccu_pll_nb, clk_nb)
+
+int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb);
+
 int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
 		    const struct sunxi_ccu_desc *desc);
 
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 74fa5c5904d3..74ed7e9a7f27 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -247,6 +247,12 @@ config ARM_TEGRA124_CPUFREQ
 	help
 	  This adds the CPUFreq driver support for Tegra124 SOCs.
 
+config ARM_TEGRA186_CPUFREQ
+	tristate "Tegra186 CPUFreq support"
+	depends on ARCH_TEGRA && TEGRA_BPMP
+	help
+	  This adds the CPUFreq driver support for Tegra186 SOCs.
+
 config ARM_TI_CPUFREQ
 	bool "Texas Instruments CPUFreq support"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9f5a8045f36d..b7e78f063c4f 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
 obj-$(CONFIG_ARM_STI_CPUFREQ)		+= sti-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
+obj-$(CONFIG_ARM_TEGRA186_CPUFREQ)	+= tegra186-cpufreq.o
 obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index 5c3ec1dd4921..3575b82210ba 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/cpufreq.h>
+#include <linux/cpu_cooling.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
@@ -18,6 +19,7 @@
 
 static struct cpufreq_frequency_table *freq_table;
 static struct clk *armss_clk;
+static struct thermal_cooling_device *cdev;
 
 static int dbx500_cpufreq_target(struct cpufreq_policy *policy,
 				unsigned int index)
@@ -32,6 +34,22 @@ static int dbx500_cpufreq_init(struct cpufreq_policy *policy)
 	return cpufreq_generic_init(policy, freq_table, 20 * 1000);
 }
 
+static int dbx500_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	if (!IS_ERR(cdev))
+		cpufreq_cooling_unregister(cdev);
+	return 0;
+}
+
+static void dbx500_cpufreq_ready(struct cpufreq_policy *policy)
+{
+	cdev = cpufreq_cooling_register(policy->cpus);
+	if (IS_ERR(cdev))
+		pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev));
+	else
+		pr_info("Cooling device registered: %s\n", cdev->type);
+}
+
 static struct cpufreq_driver dbx500_cpufreq_driver = {
 	.flags  = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS |
 			CPUFREQ_NEED_INITIAL_FREQ_CHECK,
@@ -39,6 +57,8 @@ static struct cpufreq_driver dbx500_cpufreq_driver = {
 	.target_index = dbx500_cpufreq_target,
 	.get    = cpufreq_generic_get,
 	.init   = dbx500_cpufreq_init,
+	.exit  = dbx500_cpufreq_exit,
+	.ready  = dbx500_cpufreq_ready,
 	.name   = "DBX500",
 	.attr   = cpufreq_generic_attr,
 };
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 7719b02e04f5..9c13f097fd8c 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -161,8 +161,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 
 static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
 {
+	int ret;
+
 	policy->clk = arm_clk;
-	return cpufreq_generic_init(policy, freq_table, transition_latency);
+	ret = cpufreq_generic_init(policy, freq_table, transition_latency);
+	policy->suspend_freq = policy->max;
+
+	return ret;
 }
 
 static struct cpufreq_driver imx6q_cpufreq_driver = {
@@ -173,6 +178,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = {
 	.init = imx6q_cpufreq_init,
 	.name = "imx6q-cpufreq",
 	.attr = cpufreq_generic_attr,
+	.suspend = cpufreq_generic_suspend,
 };
 
 static int imx6q_cpufreq_probe(struct platform_device *pdev)
@@ -222,6 +228,13 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	arm_reg = regulator_get(cpu_dev, "arm");
 	pu_reg = regulator_get_optional(cpu_dev, "pu");
 	soc_reg = regulator_get(cpu_dev, "soc");
+	if (PTR_ERR(arm_reg) == -EPROBE_DEFER ||
+			PTR_ERR(soc_reg) == -EPROBE_DEFER ||
+			PTR_ERR(pu_reg) == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		dev_dbg(cpu_dev, "regulators not ready, defer\n");
+		goto put_reg;
+	}
 	if (IS_ERR(arm_reg) || IS_ERR(soc_reg)) {
 		dev_err(cpu_dev, "failed to get regulators\n");
 		ret = -ENOENT;
@@ -255,7 +268,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
-		goto put_reg;
+		goto out_free_opp;
 	}
 
 	/* Make imx6_soc_volt array's size same as arm opp number */
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 283491f742d3..b7de5bd76a31 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -37,7 +37,11 @@
 #include <asm/cpufeature.h>
 #include <asm/intel-family.h>
 
+#define INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL	(10 * NSEC_PER_MSEC)
+#define INTEL_PSTATE_HWP_SAMPLING_INTERVAL	(50 * NSEC_PER_MSEC)
+
 #define INTEL_CPUFREQ_TRANSITION_LATENCY	20000
+#define INTEL_CPUFREQ_TRANSITION_DELAY		500
 
 #ifdef CONFIG_ACPI
 #include <acpi/processor.h>
@@ -74,6 +78,11 @@ static inline int ceiling_fp(int32_t x)
 	return ret;
 }
 
+static inline int32_t percent_fp(int percent)
+{
+	return div_fp(percent, 100);
+}
+
 static inline u64 mul_ext_fp(u64 x, u64 y)
 {
 	return (x * y) >> EXT_FRAC_BITS;
@@ -186,45 +195,22 @@ struct _pid {
 };
 
 /**
- * struct perf_limits - Store user and policy limits
- * @no_turbo:		User requested turbo state from intel_pstate sysfs
- * @turbo_disabled:	Platform turbo status either from msr
- *			MSR_IA32_MISC_ENABLE or when maximum available pstate
- *			matches the maximum turbo pstate
- * @max_perf_pct:	Effective maximum performance limit in percentage, this
- *			is minimum of either limits enforced by cpufreq policy
- *			or limits from user set limits via intel_pstate sysfs
- * @min_perf_pct:	Effective minimum performance limit in percentage, this
- *			is maximum of either limits enforced by cpufreq policy
- *			or limits from user set limits via intel_pstate sysfs
- * @max_perf:		This is a scaled value between 0 to 255 for max_perf_pct
- *			This value is used to limit max pstate
- * @min_perf:		This is a scaled value between 0 to 255 for min_perf_pct
- *			This value is used to limit min pstate
- * @max_policy_pct:	The maximum performance in percentage enforced by
- *			cpufreq setpolicy interface
- * @max_sysfs_pct:	The maximum performance in percentage enforced by
- *			intel pstate sysfs interface, unused when per cpu
- *			controls are enforced
- * @min_policy_pct:	The minimum performance in percentage enforced by
- *			cpufreq setpolicy interface
- * @min_sysfs_pct:	The minimum performance in percentage enforced by
- *			intel pstate sysfs interface, unused when per cpu
- *			controls are enforced
- *
- * Storage for user and policy defined limits.
+ * struct global_params - Global parameters, mostly tunable via sysfs.
+ * @no_turbo:		Whether or not to use turbo P-states.
+ * @turbo_disabled:	Whethet or not turbo P-states are available at all,
+ *			based on the MSR_IA32_MISC_ENABLE value and whether or
+ *			not the maximum reported turbo P-state is different from
+ *			the maximum reported non-turbo one.
+ * @min_perf_pct:	Minimum capacity limit in percent of the maximum turbo
+ *			P-state capacity.
+ * @max_perf_pct:	Maximum capacity limit in percent of the maximum turbo
+ *			P-state capacity.
  */
-struct perf_limits {
-	int no_turbo;
-	int turbo_disabled;
+struct global_params {
+	bool no_turbo;
+	bool turbo_disabled;
 	int max_perf_pct;
 	int min_perf_pct;
-	int32_t max_perf;
-	int32_t min_perf;
-	int max_policy_pct;
-	int max_sysfs_pct;
-	int min_policy_pct;
-	int min_sysfs_pct;
 };
 
 /**
@@ -245,9 +231,10 @@ struct perf_limits {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
- * @perf_limits:	Pointer to perf_limit unique to this CPU
- *			Not all field in the structure are applicable
- *			when per cpu controls are enforced
+ * @min_perf:		Minimum capacity limit as a fraction of the maximum
+ *			turbo P-state capacity.
+ * @max_perf:		Maximum capacity limit as a fraction of the maximum
+ *			turbo P-state capacity.
  * @acpi_perf_data:	Stores ACPI perf information read from _PSS
  * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  * @epp_powersave:	Last saved HWP energy performance preference
@@ -279,7 +266,8 @@ struct cpudata {
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
-	struct perf_limits *perf_limits;
+	int32_t	min_perf;
+	int32_t	max_perf;
 #ifdef CONFIG_ACPI
 	struct acpi_processor_performance acpi_perf_data;
 	bool valid_pss_table;
@@ -324,7 +312,7 @@ struct pstate_adjust_policy {
  * @get_scaling:	Callback to get frequency scaling factor
  * @get_val:		Callback to convert P state to actual MSR write value
  * @get_vid:		Callback to get VID data for Atom platforms
- * @get_target_pstate:	Callback to a function to calculate next P state to use
+ * @update_util:	Active mode utilization update callback.
  *
  * Core and Atom CPU models have different way to get P State limits. This
  * structure is used to store those callbacks.
@@ -337,43 +325,31 @@ struct pstate_funcs {
 	int (*get_scaling)(void);
 	u64 (*get_val)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
-	int32_t (*get_target_pstate)(struct cpudata *);
+	void (*update_util)(struct update_util_data *data, u64 time,
+			    unsigned int flags);
 };
 
-/**
- * struct cpu_defaults- Per CPU model default config data
- * @pid_policy:	PID config data
- * @funcs:		Callback function data
- */
-struct cpu_defaults {
-	struct pstate_adjust_policy pid_policy;
-	struct pstate_funcs funcs;
+static struct pstate_funcs pstate_funcs __read_mostly;
+static struct pstate_adjust_policy pid_params __read_mostly = {
+	.sample_rate_ms = 10,
+	.sample_rate_ns = 10 * NSEC_PER_MSEC,
+	.deadband = 0,
+	.setpoint = 97,
+	.p_gain_pct = 20,
+	.d_gain_pct = 0,
+	.i_gain_pct = 0,
 };
 
-static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
-static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
-
-static struct pstate_adjust_policy pid_params __read_mostly;
-static struct pstate_funcs pstate_funcs __read_mostly;
 static int hwp_active __read_mostly;
 static bool per_cpu_limits __read_mostly;
 
-static bool driver_registered __read_mostly;
+static struct cpufreq_driver *intel_pstate_driver __read_mostly;
 
 #ifdef CONFIG_ACPI
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits global;
-
-static void intel_pstate_init_limits(struct perf_limits *limits)
-{
-	memset(limits, 0, sizeof(*limits));
-	limits->max_perf_pct = 100;
-	limits->max_perf = int_ext_tofp(1);
-	limits->max_policy_pct = 100;
-	limits->max_sysfs_pct = 100;
-}
+static struct global_params global;
 
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -530,29 +506,6 @@ static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 }
 #endif
 
-static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
-			     int deadband, int integral) {
-	pid->setpoint = int_tofp(setpoint);
-	pid->deadband  = int_tofp(deadband);
-	pid->integral  = int_tofp(integral);
-	pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
-}
-
-static inline void pid_p_gain_set(struct _pid *pid, int percent)
-{
-	pid->p_gain = div_fp(percent, 100);
-}
-
-static inline void pid_i_gain_set(struct _pid *pid, int percent)
-{
-	pid->i_gain = div_fp(percent, 100);
-}
-
-static inline void pid_d_gain_set(struct _pid *pid, int percent)
-{
-	pid->d_gain = div_fp(percent, 100);
-}
-
 static signed int pid_calc(struct _pid *pid, int32_t busy)
 {
 	signed int result;
@@ -590,23 +543,17 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
 	return (signed int)fp_toint(result);
 }
 
-static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
-{
-	pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
-	pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
-	pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
-
-	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
-}
-
-static inline void intel_pstate_reset_all_pid(void)
+static inline void intel_pstate_pid_reset(struct cpudata *cpu)
 {
-	unsigned int cpu;
+	struct _pid *pid = &cpu->pid;
 
-	for_each_online_cpu(cpu) {
-		if (all_cpu_data[cpu])
-			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
-	}
+	pid->p_gain = percent_fp(pid_params.p_gain_pct);
+	pid->d_gain = percent_fp(pid_params.d_gain_pct);
+	pid->i_gain = percent_fp(pid_params.i_gain_pct);
+	pid->setpoint = int_tofp(pid_params.setpoint);
+	pid->last_err  = pid->setpoint - int_tofp(100);
+	pid->deadband  = int_tofp(pid_params.deadband);
+	pid->integral  = 0;
 }
 
 static inline void update_turbo_state(void)
@@ -621,6 +568,14 @@ static inline void update_turbo_state(void)
 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
 
+static int min_perf_pct_min(void)
+{
+	struct cpudata *cpu = all_cpu_data[0];
+
+	return DIV_ROUND_UP(cpu->pstate.min_pstate * 100,
+			    cpu->pstate.turbo_pstate);
+}
+
 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
 {
 	u64 epb;
@@ -838,96 +793,80 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 	NULL,
 };
 
-static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
+static void intel_pstate_hwp_set(unsigned int cpu)
 {
-	int min, hw_min, max, hw_max, cpu;
-	struct perf_limits *perf_limits = &global;
+	struct cpudata *cpu_data = all_cpu_data[cpu];
+	int min, hw_min, max, hw_max;
 	u64 value, cap;
+	s16 epp;
 
-	for_each_cpu(cpu, policy->cpus) {
-		struct cpudata *cpu_data = all_cpu_data[cpu];
-		s16 epp;
-
-		if (per_cpu_limits)
-			perf_limits = all_cpu_data[cpu]->perf_limits;
-
-		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
-		hw_min = HWP_LOWEST_PERF(cap);
-		if (global.no_turbo)
-			hw_max = HWP_GUARANTEED_PERF(cap);
-		else
-			hw_max = HWP_HIGHEST_PERF(cap);
-
-		max = fp_ext_toint(hw_max * perf_limits->max_perf);
-		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
-			min = max;
-		else
-			min = fp_ext_toint(hw_max * perf_limits->min_perf);
+	rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+	hw_min = HWP_LOWEST_PERF(cap);
+	if (global.no_turbo)
+		hw_max = HWP_GUARANTEED_PERF(cap);
+	else
+		hw_max = HWP_HIGHEST_PERF(cap);
 
-		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+	max = fp_ext_toint(hw_max * cpu_data->max_perf);
+	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+		min = max;
+	else
+		min = fp_ext_toint(hw_max * cpu_data->min_perf);
 
-		value &= ~HWP_MIN_PERF(~0L);
-		value |= HWP_MIN_PERF(min);
+	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
-		value &= ~HWP_MAX_PERF(~0L);
-		value |= HWP_MAX_PERF(max);
+	value &= ~HWP_MIN_PERF(~0L);
+	value |= HWP_MIN_PERF(min);
 
-		if (cpu_data->epp_policy == cpu_data->policy)
-			goto skip_epp;
+	value &= ~HWP_MAX_PERF(~0L);
+	value |= HWP_MAX_PERF(max);
 
-		cpu_data->epp_policy = cpu_data->policy;
+	if (cpu_data->epp_policy == cpu_data->policy)
+		goto skip_epp;
 
-		if (cpu_data->epp_saved >= 0) {
-			epp = cpu_data->epp_saved;
-			cpu_data->epp_saved = -EINVAL;
-			goto update_epp;
-		}
+	cpu_data->epp_policy = cpu_data->policy;
 
-		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
-			epp = intel_pstate_get_epp(cpu_data, value);
-			cpu_data->epp_powersave = epp;
-			/* If EPP read was failed, then don't try to write */
-			if (epp < 0)
-				goto skip_epp;
+	if (cpu_data->epp_saved >= 0) {
+		epp = cpu_data->epp_saved;
+		cpu_data->epp_saved = -EINVAL;
+		goto update_epp;
+	}
 
+	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
+		epp = intel_pstate_get_epp(cpu_data, value);
+		cpu_data->epp_powersave = epp;
+		/* If EPP read was failed, then don't try to write */
+		if (epp < 0)
+			goto skip_epp;
 
-			epp = 0;
-		} else {
-			/* skip setting EPP, when saved value is invalid */
-			if (cpu_data->epp_powersave < 0)
-				goto skip_epp;
+		epp = 0;
+	} else {
+		/* skip setting EPP, when saved value is invalid */
+		if (cpu_data->epp_powersave < 0)
+			goto skip_epp;
 
-			/*
-			 * No need to restore EPP when it is not zero. This
-			 * means:
-			 *  - Policy is not changed
-			 *  - user has manually changed
-			 *  - Error reading EPB
-			 */
-			epp = intel_pstate_get_epp(cpu_data, value);
-			if (epp)
-				goto skip_epp;
+		/*
+		 * No need to restore EPP when it is not zero. This
+		 * means:
+		 *  - Policy is not changed
+		 *  - user has manually changed
+		 *  - Error reading EPB
+		 */
+		epp = intel_pstate_get_epp(cpu_data, value);
+		if (epp)
+			goto skip_epp;
 
-			epp = cpu_data->epp_powersave;
-		}
+		epp = cpu_data->epp_powersave;
+	}
 update_epp:
-		if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
-			value &= ~GENMASK_ULL(31, 24);
-			value |= (u64)epp << 24;
-		} else {
-			intel_pstate_set_epb(cpu, epp);
-		}
-skip_epp:
-		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
+	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+		value &= ~GENMASK_ULL(31, 24);
+		value |= (u64)epp << 24;
+	} else {
+		intel_pstate_set_epb(cpu, epp);
 	}
-}
-
-static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
-{
-	if (hwp_active)
-		intel_pstate_hwp_set(policy);
-
-	return 0;
+skip_epp:
+	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 }
 
 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
@@ -944,20 +883,17 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
 
 static int intel_pstate_resume(struct cpufreq_policy *policy)
 {
-	int ret;
-
 	if (!hwp_active)
 		return 0;
 
 	mutex_lock(&intel_pstate_limits_lock);
 
 	all_cpu_data[policy->cpu]->epp_policy = 0;
-
-	ret = intel_pstate_hwp_set_policy(policy);
+	intel_pstate_hwp_set(policy->cpu);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
-	return ret;
+	return 0;
 }
 
 static void intel_pstate_update_policies(void)
@@ -971,9 +907,14 @@ static void intel_pstate_update_policies(void)
 /************************** debugfs begin ************************/
 static int pid_param_set(void *data, u64 val)
 {
+	unsigned int cpu;
+
 	*(u32 *)data = val;
 	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
-	intel_pstate_reset_all_pid();
+	for_each_possible_cpu(cpu)
+		if (all_cpu_data[cpu])
+			intel_pstate_pid_reset(all_cpu_data[cpu]);
+
 	return 0;
 }
 
@@ -1084,7 +1025,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1109,7 +1050,7 @@ static ssize_t show_num_pstates(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1129,7 +1070,7 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1157,7 +1098,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
@@ -1174,6 +1115,15 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	global.no_turbo = clamp_t(int, input, 0, 1);
 
+	if (global.no_turbo) {
+		struct cpudata *cpu = all_cpu_data[0];
+		int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
+
+		/* Squash the global minimum into the permitted range. */
+		if (global.min_perf_pct > pct)
+			global.min_perf_pct = pct;
+	}
+
 	mutex_unlock(&intel_pstate_limits_lock);
 
 	intel_pstate_update_policies();
@@ -1195,18 +1145,14 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
-	global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
-	global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
-	global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
-	global.max_perf = percent_ext_fp(global.max_perf_pct);
+	global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
@@ -1229,18 +1175,15 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_driver_lock);
 
-	if (!driver_registered) {
+	if (!intel_pstate_driver) {
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EAGAIN;
 	}
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
-	global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
-	global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
-	global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
-	global.min_perf = percent_ext_fp(global.min_perf_pct);
+	global.min_perf_pct = clamp_t(int, input,
+				      min_perf_pct_min(), global.max_perf_pct);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
@@ -1554,132 +1497,10 @@ static int knl_get_turbo_pstate(void)
 	return ret;
 }
 
-static struct cpu_defaults core_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 97,
-		.p_gain_pct = 20,
-		.d_gain_pct = 0,
-		.i_gain_pct = 0,
-	},
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_performance,
-	},
-};
-
-static const struct cpu_defaults silvermont_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = silvermont_get_scaling,
-		.get_vid = atom_get_vid,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
-	},
-};
-
-static const struct cpu_defaults airmont_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
-	.funcs = {
-		.get_max = atom_get_max_pstate,
-		.get_max_physical = atom_get_max_pstate,
-		.get_min = atom_get_min_pstate,
-		.get_turbo = atom_get_turbo_pstate,
-		.get_val = atom_get_val,
-		.get_scaling = airmont_get_scaling,
-		.get_vid = atom_get_vid,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
-	},
-};
-
-static const struct cpu_defaults knl_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 97,
-		.p_gain_pct = 20,
-		.d_gain_pct = 0,
-		.i_gain_pct = 0,
-	},
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = knl_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_performance,
-	},
-};
-
-static const struct cpu_defaults bxt_params = {
-	.pid_policy = {
-		.sample_rate_ms = 10,
-		.deadband = 0,
-		.setpoint = 60,
-		.p_gain_pct = 14,
-		.d_gain_pct = 0,
-		.i_gain_pct = 4,
-	},
-	.funcs = {
-		.get_max = core_get_max_pstate,
-		.get_max_physical = core_get_max_pstate_physical,
-		.get_min = core_get_min_pstate,
-		.get_turbo = core_get_turbo_pstate,
-		.get_scaling = core_get_scaling,
-		.get_val = core_get_val,
-		.get_target_pstate = get_target_pstate_use_cpu_load,
-	},
-};
-
-static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
+static int intel_pstate_get_base_pstate(struct cpudata *cpu)
 {
-	int max_perf = cpu->pstate.turbo_pstate;
-	int max_perf_adj;
-	int min_perf;
-	struct perf_limits *perf_limits = &global;
-
-	if (global.no_turbo || global.turbo_disabled)
-		max_perf = cpu->pstate.max_pstate;
-
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
-	/*
-	 * performance can be limited by user through sysfs, by cpufreq
-	 * policy, or by cpu specific default values determined through
-	 * experimentation.
-	 */
-	max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf);
-	*max = clamp_t(int, max_perf_adj,
-			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
-
-	min_perf = fp_ext_toint(max_perf * perf_limits->min_perf);
-	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+	return global.no_turbo || global.turbo_disabled ?
+			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 }
 
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
@@ -1702,11 +1523,13 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu)
 
 static void intel_pstate_max_within_limits(struct cpudata *cpu)
 {
-	int min_pstate, max_pstate;
+	int pstate;
 
 	update_turbo_state();
-	intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
-	intel_pstate_set_pstate(cpu, max_pstate);
+	pstate = intel_pstate_get_base_pstate(cpu);
+	pstate = max(cpu->pstate.min_pstate,
+		     fp_ext_toint(pstate * cpu->max_perf));
+	intel_pstate_set_pstate(cpu, pstate);
 }
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -1767,7 +1590,11 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
 	 * that sample.time will always be reset before setting the utilization
 	 * update hook and make the caller skip the sample then.
 	 */
-	return !!cpu->last_sample_time;
+	if (cpu->last_sample_time) {
+		intel_pstate_calc_avg_perf(cpu);
+		return true;
+	}
+	return false;
 }
 
 static inline int32_t get_avg_frequency(struct cpudata *cpu)
@@ -1788,6 +1615,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 	int32_t busy_frac, boost;
 	int target, avg_pstate;
 
+	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
+		return cpu->pstate.turbo_pstate;
+
 	busy_frac = div_fp(sample->mperf, sample->tsc);
 
 	boost = cpu->iowait_boost;
@@ -1824,6 +1654,9 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	int32_t perf_scaled, max_pstate, current_pstate, sample_ratio;
 	u64 duration_ns;
 
+	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
+		return cpu->pstate.turbo_pstate;
+
 	/*
 	 * perf_scaled is the ratio of the average P-state during the last
 	 * sampling period to the P-state requested last time (in percent).
@@ -1858,11 +1691,13 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 
 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 {
-	int max_perf, min_perf;
+	int max_pstate = intel_pstate_get_base_pstate(cpu);
+	int min_pstate;
 
-	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-	pstate = clamp_t(int, pstate, min_perf, max_perf);
-	return pstate;
+	min_pstate = max(cpu->pstate.min_pstate,
+			 fp_ext_toint(max_pstate * cpu->min_perf));
+	max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf));
+	return clamp_t(int, pstate, min_pstate, max_pstate);
 }
 
 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
@@ -1874,16 +1709,11 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
 }
 
-static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
+static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate)
 {
-	int from, target_pstate;
+	int from = cpu->pstate.current_pstate;
 	struct sample *sample;
 
-	from = cpu->pstate.current_pstate;
-
-	target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
-		cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
-
 	update_turbo_state();
 
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
@@ -1902,76 +1732,155 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 		fp_toint(cpu->iowait_boost * 100));
 }
 
+static void intel_pstate_update_util_hwp(struct update_util_data *data,
+					 u64 time, unsigned int flags)
+{
+	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+	u64 delta_ns = time - cpu->sample.time;
+
+	if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL)
+		intel_pstate_sample(cpu, time);
+}
+
+static void intel_pstate_update_util_pid(struct update_util_data *data,
+					 u64 time, unsigned int flags)
+{
+	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+	u64 delta_ns = time - cpu->sample.time;
+
+	if ((s64)delta_ns < pid_params.sample_rate_ns)
+		return;
+
+	if (intel_pstate_sample(cpu, time)) {
+		int target_pstate;
+
+		target_pstate = get_target_pstate_use_performance(cpu);
+		intel_pstate_adjust_pstate(cpu, target_pstate);
+	}
+}
+
 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 				     unsigned int flags)
 {
 	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
 	u64 delta_ns;
 
-	if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) {
-		if (flags & SCHED_CPUFREQ_IOWAIT) {
-			cpu->iowait_boost = int_tofp(1);
-		} else if (cpu->iowait_boost) {
-			/* Clear iowait_boost if the CPU may have been idle. */
-			delta_ns = time - cpu->last_update;
-			if (delta_ns > TICK_NSEC)
-				cpu->iowait_boost = 0;
-		}
-		cpu->last_update = time;
+	if (flags & SCHED_CPUFREQ_IOWAIT) {
+		cpu->iowait_boost = int_tofp(1);
+	} else if (cpu->iowait_boost) {
+		/* Clear iowait_boost if the CPU may have been idle. */
+		delta_ns = time - cpu->last_update;
+		if (delta_ns > TICK_NSEC)
+			cpu->iowait_boost = 0;
 	}
-
+	cpu->last_update = time;
 	delta_ns = time - cpu->sample.time;
-	if ((s64)delta_ns >= pid_params.sample_rate_ns) {
-		bool sample_taken = intel_pstate_sample(cpu, time);
+	if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL)
+		return;
 
-		if (sample_taken) {
-			intel_pstate_calc_avg_perf(cpu);
-			if (!hwp_active)
-				intel_pstate_adjust_busy_pstate(cpu);
-		}
+	if (intel_pstate_sample(cpu, time)) {
+		int target_pstate;
+
+		target_pstate = get_target_pstate_use_cpu_load(cpu);
+		intel_pstate_adjust_pstate(cpu, target_pstate);
 	}
 }
 
+static struct pstate_funcs core_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = core_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util_pid,
+};
+
+static const struct pstate_funcs silvermont_funcs = {
+	.get_max = atom_get_max_pstate,
+	.get_max_physical = atom_get_max_pstate,
+	.get_min = atom_get_min_pstate,
+	.get_turbo = atom_get_turbo_pstate,
+	.get_val = atom_get_val,
+	.get_scaling = silvermont_get_scaling,
+	.get_vid = atom_get_vid,
+	.update_util = intel_pstate_update_util,
+};
+
+static const struct pstate_funcs airmont_funcs = {
+	.get_max = atom_get_max_pstate,
+	.get_max_physical = atom_get_max_pstate,
+	.get_min = atom_get_min_pstate,
+	.get_turbo = atom_get_turbo_pstate,
+	.get_val = atom_get_val,
+	.get_scaling = airmont_get_scaling,
+	.get_vid = atom_get_vid,
+	.update_util = intel_pstate_update_util,
+};
+
+static const struct pstate_funcs knl_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = knl_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util_pid,
+};
+
+static const struct pstate_funcs bxt_funcs = {
+	.get_max = core_get_max_pstate,
+	.get_max_physical = core_get_max_pstate_physical,
+	.get_min = core_get_min_pstate,
+	.get_turbo = core_get_turbo_pstate,
+	.get_scaling = core_get_scaling,
+	.get_val = core_get_val,
+	.update_util = intel_pstate_update_util,
+};
+
 #define ICPU(model, policy) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
 			(unsigned long)&policy }
 
 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
-	ICPU(INTEL_FAM6_SANDYBRIDGE, 		core_params),
-	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		core_params),
-	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	silvermont_params),
-	ICPU(INTEL_FAM6_IVYBRIDGE,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_CORE,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_CORE,		core_params),
-	ICPU(INTEL_FAM6_IVYBRIDGE_X,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_X,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_ULT,		core_params),
-	ICPU(INTEL_FAM6_HASWELL_GT3E,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_GT3E,		core_params),
-	ICPU(INTEL_FAM6_ATOM_AIRMONT,		airmont_params),
-	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		core_params),
-	ICPU(INTEL_FAM6_BROADWELL_X,		core_params),
-	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_params),
-	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_params),
-	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_params),
-	ICPU(INTEL_FAM6_XEON_PHI_KNM,		knl_params),
-	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_params),
+	ICPU(INTEL_FAM6_SANDYBRIDGE, 		core_funcs),
+	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		core_funcs),
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	silvermont_funcs),
+	ICPU(INTEL_FAM6_IVYBRIDGE,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_CORE,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_CORE,		core_funcs),
+	ICPU(INTEL_FAM6_IVYBRIDGE_X,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_X,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_ULT,		core_funcs),
+	ICPU(INTEL_FAM6_HASWELL_GT3E,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_GT3E,		core_funcs),
+	ICPU(INTEL_FAM6_ATOM_AIRMONT,		airmont_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_X,		core_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_funcs),
+	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_funcs),
+	ICPU(INTEL_FAM6_XEON_PHI_KNM,		knl_funcs),
+	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_funcs),
+	ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,       bxt_funcs),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 
 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
-	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
-	ICPU(INTEL_FAM6_BROADWELL_X, core_params),
-	ICPU(INTEL_FAM6_SKYLAKE_X, core_params),
+	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs),
+	ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
 	{}
 };
 
 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
-	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params),
+	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs),
 	{}
 };
 
+static bool pid_in_use(void);
+
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
 	struct cpudata *cpu;
@@ -1979,18 +1888,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 	cpu = all_cpu_data[cpunum];
 
 	if (!cpu) {
-		unsigned int size = sizeof(struct cpudata);
-
-		if (per_cpu_limits)
-			size += sizeof(struct perf_limits);
-
-		cpu = kzalloc(size, GFP_KERNEL);
+		cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
 		if (!cpu)
 			return -ENOMEM;
 
 		all_cpu_data[cpunum] = cpu;
-		if (per_cpu_limits)
-			cpu->perf_limits = (struct perf_limits *)(cpu + 1);
 
 		cpu->epp_default = -EINVAL;
 		cpu->epp_powersave = -EINVAL;
@@ -2009,14 +1911,12 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 			intel_pstate_disable_ee(cpunum);
 
 		intel_pstate_hwp_enable(cpu);
-		pid_params.sample_rate_ms = 50;
-		pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
+	} else if (pid_in_use()) {
+		intel_pstate_pid_reset(cpu);
 	}
 
 	intel_pstate_get_cpu_pstates(cpu);
 
-	intel_pstate_busy_pid_reset(cpu);
-
 	pr_debug("controlling: cpu %d\n", cpunum);
 
 	return 0;
@@ -2039,7 +1939,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
-				     intel_pstate_update_util);
+				     pstate_funcs.update_util);
 	cpu->update_util_set = true;
 }
 
@@ -2055,46 +1955,68 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 	synchronize_sched();
 }
 
+static int intel_pstate_get_max_freq(struct cpudata *cpu)
+{
+	return global.turbo_disabled || global.no_turbo ?
+			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+}
+
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
-					    struct perf_limits *limits)
+					    struct cpudata *cpu)
 {
+	int max_freq = intel_pstate_get_max_freq(cpu);
 	int32_t max_policy_perf, min_policy_perf;
 
-	max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+	max_policy_perf = div_ext_fp(policy->max, max_freq);
 	max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
 	if (policy->max == policy->min) {
 		min_policy_perf = max_policy_perf;
 	} else {
-		min_policy_perf = div_ext_fp(policy->min,
-					     policy->cpuinfo.max_freq);
+		min_policy_perf = div_ext_fp(policy->min, max_freq);
 		min_policy_perf = clamp_t(int32_t, min_policy_perf,
 					  0, max_policy_perf);
 	}
 
 	/* Normalize user input to [min_perf, max_perf] */
-	limits->min_perf = max(min_policy_perf,
-			       percent_ext_fp(limits->min_sysfs_pct));
-	limits->min_perf = min(limits->min_perf, max_policy_perf);
-	limits->max_perf = min(max_policy_perf,
-			       percent_ext_fp(limits->max_sysfs_pct));
-	limits->max_perf = max(min_policy_perf, limits->max_perf);
+	if (per_cpu_limits) {
+		cpu->min_perf = min_policy_perf;
+		cpu->max_perf = max_policy_perf;
+	} else {
+		int32_t global_min, global_max;
+
+		/* Global limits are in percent of the maximum turbo P-state. */
+		global_max = percent_ext_fp(global.max_perf_pct);
+		global_min = percent_ext_fp(global.min_perf_pct);
+		if (max_freq != cpu->pstate.turbo_freq) {
+			int32_t turbo_factor;
+
+			turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
+						  cpu->pstate.max_pstate);
+			global_min = mul_ext_fp(global_min, turbo_factor);
+			global_max = mul_ext_fp(global_max, turbo_factor);
+		}
+		global_min = clamp_t(int32_t, global_min, 0, global_max);
+
+		cpu->min_perf = max(min_policy_perf, global_min);
+		cpu->min_perf = min(cpu->min_perf, max_policy_perf);
+		cpu->max_perf = min(max_policy_perf, global_max);
+		cpu->max_perf = max(min_policy_perf, cpu->max_perf);
 
-	/* Make sure min_perf <= max_perf */
-	limits->min_perf = min(limits->min_perf, limits->max_perf);
+		/* Make sure min_perf <= max_perf */
+		cpu->min_perf = min(cpu->min_perf, cpu->max_perf);
+	}
 
-	limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
-	limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
-	limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
-	limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
+	cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS);
+	cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS);
 
 	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
-		 limits->max_perf_pct, limits->min_perf_pct);
+		 fp_ext_toint(cpu->max_perf * 100),
+		 fp_ext_toint(cpu->min_perf * 100));
 }
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	struct perf_limits *perf_limits = &global;
 
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
@@ -2105,19 +2027,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	cpu = all_cpu_data[policy->cpu];
 	cpu->policy = policy->policy;
 
-	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
-	    policy->max < policy->cpuinfo.max_freq &&
-	    policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
-		pr_debug("policy->max > max non turbo frequency\n");
-		policy->max = policy->cpuinfo.max_freq;
-	}
-
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
 	mutex_lock(&intel_pstate_limits_lock);
 
-	intel_pstate_update_perf_limits(policy, perf_limits);
+	intel_pstate_update_perf_limits(policy, cpu);
 
 	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		/*
@@ -2130,38 +2042,38 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 	intel_pstate_set_update_util_hook(policy->cpu);
 
-	intel_pstate_hwp_set_policy(policy);
+	if (hwp_active)
+		intel_pstate_hwp_set(policy->cpu);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
 	return 0;
 }
 
+static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
+					 struct cpudata *cpu)
+{
+	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
+	    policy->max < policy->cpuinfo.max_freq &&
+	    policy->max > cpu->pstate.max_freq) {
+		pr_debug("policy->max > max non turbo frequency\n");
+		policy->max = policy->cpuinfo.max_freq;
+	}
+}
+
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
-					cpu->pstate.max_freq :
-					cpu->pstate.turbo_freq;
-
-	cpufreq_verify_within_cpu_limits(policy);
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     intel_pstate_get_max_freq(cpu));
 
 	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
 	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
 		return -EINVAL;
 
-	/* When per-CPU limits are used, sysfs limits are not used */
-	if (!per_cpu_limits) {
-		unsigned int max_freq, min_freq;
-
-		max_freq = policy->cpuinfo.max_freq *
-					global.max_sysfs_pct / 100;
-		min_freq = policy->cpuinfo.max_freq *
-					global.min_sysfs_pct / 100;
-		cpufreq_verify_within_limits(policy, min_freq, max_freq);
-	}
+	intel_pstate_adjust_policy_max(policy, cpu);
 
 	return 0;
 }
@@ -2202,8 +2114,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	if (per_cpu_limits)
-		intel_pstate_init_limits(cpu->perf_limits);
+	cpu->max_perf = int_ext_tofp(1);
+	cpu->min_perf = 0;
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -2257,10 +2169,12 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
-			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     intel_pstate_get_max_freq(cpu));
 
-	cpufreq_verify_within_cpu_limits(policy);
+	intel_pstate_adjust_policy_max(policy, cpu);
+
+	intel_pstate_update_perf_limits(policy, cpu);
 
 	return 0;
 }
@@ -2324,6 +2238,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		return ret;
 
 	policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
+	policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
 	/* This reflects the intel_pstate_get_cpu_pstates() setting. */
 	policy->cur = policy->cpuinfo.min_freq;
 
@@ -2341,7 +2256,13 @@ static struct cpufreq_driver intel_cpufreq = {
 	.name		= "intel_cpufreq",
 };
 
-static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
+static struct cpufreq_driver *default_driver = &intel_pstate;
+
+static bool pid_in_use(void)
+{
+	return intel_pstate_driver == &intel_pstate &&
+		pstate_funcs.update_util == intel_pstate_update_util_pid;
+}
 
 static void intel_pstate_driver_cleanup(void)
 {
@@ -2358,26 +2279,26 @@ static void intel_pstate_driver_cleanup(void)
 		}
 	}
 	put_online_cpus();
+	intel_pstate_driver = NULL;
 }
 
-static int intel_pstate_register_driver(void)
+static int intel_pstate_register_driver(struct cpufreq_driver *driver)
 {
 	int ret;
 
-	intel_pstate_init_limits(&global);
+	memset(&global, 0, sizeof(global));
+	global.max_perf_pct = 100;
 
+	intel_pstate_driver = driver;
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
 		intel_pstate_driver_cleanup();
 		return ret;
 	}
 
-	mutex_lock(&intel_pstate_limits_lock);
-	driver_registered = true;
-	mutex_unlock(&intel_pstate_limits_lock);
+	global.min_perf_pct = min_perf_pct_min();
 
-	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+	if (pid_in_use())
 		intel_pstate_debug_expose_params();
 
 	return 0;
@@ -2388,14 +2309,9 @@ static int intel_pstate_unregister_driver(void)
 	if (hwp_active)
 		return -EBUSY;
 
-	if (intel_pstate_driver == &intel_pstate && !hwp_active &&
-	    pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+	if (pid_in_use())
 		intel_pstate_debug_hide_params();
 
-	mutex_lock(&intel_pstate_limits_lock);
-	driver_registered = false;
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	cpufreq_unregister_driver(intel_pstate_driver);
 	intel_pstate_driver_cleanup();
 
@@ -2404,7 +2320,7 @@ static int intel_pstate_unregister_driver(void)
 
 static ssize_t intel_pstate_show_status(char *buf)
 {
-	if (!driver_registered)
+	if (!intel_pstate_driver)
 		return sprintf(buf, "off\n");
 
 	return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
@@ -2416,11 +2332,11 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 	int ret;
 
 	if (size == 3 && !strncmp(buf, "off", size))
-		return driver_registered ?
+		return intel_pstate_driver ?
 			intel_pstate_unregister_driver() : -EINVAL;
 
 	if (size == 6 && !strncmp(buf, "active", size)) {
-		if (driver_registered) {
+		if (intel_pstate_driver) {
 			if (intel_pstate_driver == &intel_pstate)
 				return 0;
 
@@ -2429,13 +2345,12 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 				return ret;
 		}
 
-		intel_pstate_driver = &intel_pstate;
-		return intel_pstate_register_driver();
+		return intel_pstate_register_driver(&intel_pstate);
 	}
 
 	if (size == 7 && !strncmp(buf, "passive", size)) {
-		if (driver_registered) {
-			if (intel_pstate_driver != &intel_pstate)
+		if (intel_pstate_driver) {
+			if (intel_pstate_driver == &intel_cpufreq)
 				return 0;
 
 			ret = intel_pstate_unregister_driver();
@@ -2443,8 +2358,7 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 				return ret;
 		}
 
-		intel_pstate_driver = &intel_cpufreq;
-		return intel_pstate_register_driver();
+		return intel_pstate_register_driver(&intel_cpufreq);
 	}
 
 	return -EINVAL;
@@ -2465,23 +2379,17 @@ static int __init intel_pstate_msrs_not_valid(void)
 	return 0;
 }
 
-static void __init copy_pid_params(struct pstate_adjust_policy *policy)
-{
-	pid_params.sample_rate_ms = policy->sample_rate_ms;
-	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
-	pid_params.p_gain_pct = policy->p_gain_pct;
-	pid_params.i_gain_pct = policy->i_gain_pct;
-	pid_params.d_gain_pct = policy->d_gain_pct;
-	pid_params.deadband = policy->deadband;
-	pid_params.setpoint = policy->setpoint;
-}
-
 #ifdef CONFIG_ACPI
 static void intel_pstate_use_acpi_profile(void)
 {
-	if (acpi_gbl_FADT.preferred_profile == PM_MOBILE)
-		pstate_funcs.get_target_pstate =
-				get_target_pstate_use_cpu_load;
+	switch (acpi_gbl_FADT.preferred_profile) {
+	case PM_MOBILE:
+	case PM_TABLET:
+	case PM_APPLIANCE_PC:
+	case PM_DESKTOP:
+	case PM_WORKSTATION:
+		pstate_funcs.update_util = intel_pstate_update_util;
+	}
 }
 #else
 static void intel_pstate_use_acpi_profile(void)
@@ -2498,7 +2406,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 	pstate_funcs.get_scaling = funcs->get_scaling;
 	pstate_funcs.get_val   = funcs->get_val;
 	pstate_funcs.get_vid   = funcs->get_vid;
-	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
+	pstate_funcs.update_util = funcs->update_util;
 
 	intel_pstate_use_acpi_profile();
 }
@@ -2637,28 +2545,30 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
 
 static int __init intel_pstate_init(void)
 {
-	const struct x86_cpu_id *id;
-	struct cpu_defaults *cpu_def;
-	int rc = 0;
+	int rc;
 
 	if (no_load)
 		return -ENODEV;
 
-	if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
-		copy_cpu_funcs(&core_params.funcs);
-		hwp_active++;
-		intel_pstate.attr = hwp_cpufreq_attrs;
-		goto hwp_cpu_matched;
-	}
-
-	id = x86_match_cpu(intel_pstate_cpu_ids);
-	if (!id)
-		return -ENODEV;
+	if (x86_match_cpu(hwp_support_ids)) {
+		copy_cpu_funcs(&core_funcs);
+		if (no_hwp) {
+			pstate_funcs.update_util = intel_pstate_update_util;
+		} else {
+			hwp_active++;
+			intel_pstate.attr = hwp_cpufreq_attrs;
+			pstate_funcs.update_util = intel_pstate_update_util_hwp;
+			goto hwp_cpu_matched;
+		}
+	} else {
+		const struct x86_cpu_id *id;
 
-	cpu_def = (struct cpu_defaults *)id->driver_data;
+		id = x86_match_cpu(intel_pstate_cpu_ids);
+		if (!id)
+			return -ENODEV;
 
-	copy_pid_params(&cpu_def->pid_policy);
-	copy_cpu_funcs(&cpu_def->funcs);
+		copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
+	}
 
 	if (intel_pstate_msrs_not_valid())
 		return -ENODEV;
@@ -2685,7 +2595,7 @@ hwp_cpu_matched:
 	intel_pstate_sysfs_expose_params();
 
 	mutex_lock(&intel_pstate_driver_lock);
-	rc = intel_pstate_register_driver();
+	rc = intel_pstate_register_driver(default_driver);
 	mutex_unlock(&intel_pstate_driver_lock);
 	if (rc)
 		return rc;
@@ -2706,7 +2616,7 @@ static int __init intel_pstate_setup(char *str)
 		no_load = 1;
 	} else if (!strcmp(str, "passive")) {
 		pr_info("Passive mode enabled\n");
-		intel_pstate_driver = &intel_cpufreq;
+		default_driver = &intel_cpufreq;
 		no_hwp = 1;
 	}
 	if (!strcmp(str, "no_hwp")) {
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index ab25b1235a5e..fd1886faf33a 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -573,14 +573,33 @@ static struct platform_driver mt8173_cpufreq_platdrv = {
 	.probe		= mt8173_cpufreq_probe,
 };
 
-static int mt8173_cpufreq_driver_init(void)
+/* List of machines supported by this driver */
+static const struct of_device_id mt8173_cpufreq_machines[] __initconst = {
+	{ .compatible = "mediatek,mt817x", },
+	{ .compatible = "mediatek,mt8173", },
+	{ .compatible = "mediatek,mt8176", },
+
+	{ }
+};
+
+static int __init mt8173_cpufreq_driver_init(void)
 {
+	struct device_node *np;
+	const struct of_device_id *match;
 	struct platform_device *pdev;
 	int err;
 
-	if (!of_machine_is_compatible("mediatek,mt8173"))
+	np = of_find_node_by_path("/");
+	if (!np)
 		return -ENODEV;
 
+	match = of_match_node(mt8173_cpufreq_machines, np);
+	of_node_put(np);
+	if (!match) {
+		pr_warn("Machine is not compatible with mt8173-cpufreq\n");
+		return -ENODEV;
+	}
+
 	err = platform_driver_register(&mt8173_cpufreq_platdrv);
 	if (err)
 		return err;
diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index bfec1bcd3835..e2ea433a5f9c 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -52,17 +52,27 @@ static u32 get_bus_freq(void)
 {
 	struct device_node *soc;
 	u32 sysfreq;
+	struct clk *pltclk;
+	int ret;
 
+	/* get platform freq by searching bus-frequency property */
 	soc = of_find_node_by_type(NULL, "soc");
-	if (!soc)
-		return 0;
-
-	if (of_property_read_u32(soc, "bus-frequency", &sysfreq))
-		sysfreq = 0;
+	if (soc) {
+		ret = of_property_read_u32(soc, "bus-frequency", &sysfreq);
+		of_node_put(soc);
+		if (!ret)
+			return sysfreq;
+	}
 
-	of_node_put(soc);
+	/* get platform freq by its clock name */
+	pltclk = clk_get(NULL, "cg-pll0-div1");
+	if (IS_ERR(pltclk)) {
+		pr_err("%s: can't get bus frequency %ld\n",
+		       __func__, PTR_ERR(pltclk));
+		return PTR_ERR(pltclk);
+	}
 
-	return sysfreq;
+	return clk_get_rate(pltclk);
 }
 
 static struct clk *cpu_to_clk(int cpu)
diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c
new file mode 100644
index 000000000000..fe7875311d62
--- /dev/null
+++ b/drivers/cpufreq/tegra186-cpufreq.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+
+#define EDVD_CORE_VOLT_FREQ(core)		(0x20 + (core) * 0x4)
+#define EDVD_CORE_VOLT_FREQ_F_SHIFT		0
+#define EDVD_CORE_VOLT_FREQ_V_SHIFT		16
+
+struct tegra186_cpufreq_cluster_info {
+	unsigned long offset;
+	int cpus[4];
+	unsigned int bpmp_cluster_id;
+};
+
+#define NO_CPU -1
+static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = {
+	/* Denver cluster */
+	{
+		.offset = SZ_64K * 7,
+		.cpus = { 1, 2, NO_CPU, NO_CPU },
+		.bpmp_cluster_id = 0,
+	},
+	/* A57 cluster */
+	{
+		.offset = SZ_64K * 6,
+		.cpus = { 0, 3, 4, 5 },
+		.bpmp_cluster_id = 1,
+	},
+};
+
+struct tegra186_cpufreq_cluster {
+	const struct tegra186_cpufreq_cluster_info *info;
+	struct cpufreq_frequency_table *table;
+};
+
+struct tegra186_cpufreq_data {
+	void __iomem *regs;
+
+	size_t num_clusters;
+	struct tegra186_cpufreq_cluster *clusters;
+};
+
+static int tegra186_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct tegra186_cpufreq_data *data = cpufreq_get_driver_data();
+	unsigned int i;
+
+	for (i = 0; i < data->num_clusters; i++) {
+		struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
+		const struct tegra186_cpufreq_cluster_info *info =
+			cluster->info;
+		int core;
+
+		for (core = 0; core < ARRAY_SIZE(info->cpus); core++) {
+			if (info->cpus[core] == policy->cpu)
+				break;
+		}
+		if (core == ARRAY_SIZE(info->cpus))
+			continue;
+
+		policy->driver_data =
+			data->regs + info->offset + EDVD_CORE_VOLT_FREQ(core);
+		cpufreq_table_validate_and_show(policy, cluster->table);
+	}
+
+	policy->cpuinfo.transition_latency = 300 * 1000;
+
+	return 0;
+}
+
+static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy,
+				       unsigned int index)
+{
+	struct cpufreq_frequency_table *tbl = policy->freq_table + index;
+	void __iomem *edvd_reg = policy->driver_data;
+	u32 edvd_val = tbl->driver_data;
+
+	writel(edvd_val, edvd_reg);
+
+	return 0;
+}
+
+static struct cpufreq_driver tegra186_cpufreq_driver = {
+	.name = "tegra186",
+	.flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
+	.verify = cpufreq_generic_frequency_table_verify,
+	.target_index = tegra186_cpufreq_set_target,
+	.init = tegra186_cpufreq_init,
+	.attr = cpufreq_generic_attr,
+};
+
+static struct cpufreq_frequency_table *init_vhint_table(
+	struct platform_device *pdev, struct tegra_bpmp *bpmp,
+	unsigned int cluster_id)
+{
+	struct cpufreq_frequency_table *table;
+	struct mrq_cpu_vhint_request req;
+	struct tegra_bpmp_message msg;
+	struct cpu_vhint_data *data;
+	int err, i, j, num_rates = 0;
+	dma_addr_t phys;
+	void *virt;
+
+	virt = dma_alloc_coherent(bpmp->dev, sizeof(*data), &phys,
+				  GFP_KERNEL | GFP_DMA32);
+	if (!virt)
+		return ERR_PTR(-ENOMEM);
+
+	data = (struct cpu_vhint_data *)virt;
+
+	memset(&req, 0, sizeof(req));
+	req.addr = phys;
+	req.cluster_id = cluster_id;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_CPU_VHINT;
+	msg.tx.data = &req;
+	msg.tx.size = sizeof(req);
+
+	err = tegra_bpmp_transfer(bpmp, &msg);
+	if (err) {
+		table = ERR_PTR(err);
+		goto free;
+	}
+
+	for (i = data->vfloor; i <= data->vceil; i++) {
+		u16 ndiv = data->ndiv[i];
+
+		if (ndiv < data->ndiv_min || ndiv > data->ndiv_max)
+			continue;
+
+		/* Only store lowest voltage index for each rate */
+		if (i > 0 && ndiv == data->ndiv[i - 1])
+			continue;
+
+		num_rates++;
+	}
+
+	table = devm_kcalloc(&pdev->dev, num_rates + 1, sizeof(*table),
+			     GFP_KERNEL);
+	if (!table) {
+		table = ERR_PTR(-ENOMEM);
+		goto free;
+	}
+
+	for (i = data->vfloor, j = 0; i <= data->vceil; i++) {
+		struct cpufreq_frequency_table *point;
+		u16 ndiv = data->ndiv[i];
+		u32 edvd_val = 0;
+
+		if (ndiv < data->ndiv_min || ndiv > data->ndiv_max)
+			continue;
+
+		/* Only store lowest voltage index for each rate */
+		if (i > 0 && ndiv == data->ndiv[i - 1])
+			continue;
+
+		edvd_val |= i << EDVD_CORE_VOLT_FREQ_V_SHIFT;
+		edvd_val |= ndiv << EDVD_CORE_VOLT_FREQ_F_SHIFT;
+
+		point = &table[j++];
+		point->driver_data = edvd_val;
+		point->frequency = data->ref_clk_hz * ndiv / data->pdiv /
+			data->mdiv / 1000;
+	}
+
+	table[j].frequency = CPUFREQ_TABLE_END;
+
+free:
+	dma_free_coherent(bpmp->dev, sizeof(*data), virt, phys);
+
+	return table;
+}
+
+static int tegra186_cpufreq_probe(struct platform_device *pdev)
+{
+	struct tegra186_cpufreq_data *data;
+	struct tegra_bpmp *bpmp;
+	struct resource *res;
+	unsigned int i = 0, err;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->clusters = devm_kcalloc(&pdev->dev, ARRAY_SIZE(tegra186_clusters),
+				      sizeof(*data->clusters), GFP_KERNEL);
+	if (!data->clusters)
+		return -ENOMEM;
+
+	data->num_clusters = ARRAY_SIZE(tegra186_clusters);
+
+	bpmp = tegra_bpmp_get(&pdev->dev);
+	if (IS_ERR(bpmp))
+		return PTR_ERR(bpmp);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->regs)) {
+		err = PTR_ERR(data->regs);
+		goto put_bpmp;
+	}
+
+	for (i = 0; i < data->num_clusters; i++) {
+		struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
+
+		cluster->info = &tegra186_clusters[i];
+		cluster->table = init_vhint_table(
+			pdev, bpmp, cluster->info->bpmp_cluster_id);
+		if (IS_ERR(cluster->table)) {
+			err = PTR_ERR(cluster->table);
+			goto put_bpmp;
+		}
+	}
+
+	tegra_bpmp_put(bpmp);
+
+	tegra186_cpufreq_driver.driver_data = data;
+
+	err = cpufreq_register_driver(&tegra186_cpufreq_driver);
+	if (err)
+		return err;
+
+	return 0;
+
+put_bpmp:
+	tegra_bpmp_put(bpmp);
+
+	return err;
+}
+
+static int tegra186_cpufreq_remove(struct platform_device *pdev)
+{
+	cpufreq_unregister_driver(&tegra186_cpufreq_driver);
+
+	return 0;
+}
+
+static const struct of_device_id tegra186_cpufreq_of_match[] = {
+	{ .compatible = "nvidia,tegra186-ccplex-cluster", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tegra186_cpufreq_of_match);
+
+static struct platform_driver tegra186_cpufreq_platform_driver = {
+	.driver = {
+		.name = "tegra186-cpufreq",
+		.of_match_table = tegra186_cpufreq_of_match,
+	},
+	.probe = tegra186_cpufreq_probe,
+	.remove = tegra186_cpufreq_remove,
+};
+module_platform_driver(tegra186_cpufreq_platform_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra186 cpufreq driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c
index 926ba9871c62..12b9145913de 100644
--- a/drivers/cpuidle/cpuidle-cps.c
+++ b/drivers/cpuidle/cpuidle-cps.c
@@ -118,7 +118,7 @@ static void __init cps_cpuidle_unregister(void)
 
 static int __init cps_cpuidle_init(void)
 {
-	int err, cpu, core, i;
+	int err, cpu, i;
 	struct cpuidle_device *device;
 
 	/* Detect supported states */
@@ -160,7 +160,6 @@ static int __init cps_cpuidle_init(void)
 	}
 
 	for_each_possible_cpu(cpu) {
-		core = cpu_data[cpu].core;
 		device = &per_cpu(cpuidle_dev, cpu);
 		device->cpu = cpu;
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index cda8f62d555b..12409a519cc5 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -56,10 +56,9 @@ static int snooze_loop(struct cpuidle_device *dev,
 
 	snooze_exit_time = get_tb() + snooze_timeout;
 	ppc64_runlatch_off();
+	HMT_very_low();
 	while (!need_resched()) {
-		HMT_low();
-		HMT_very_low();
-		if (snooze_timeout_en && get_tb() > snooze_exit_time)
+		if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time)
 			break;
 	}
 
@@ -215,11 +214,25 @@ static inline void add_powernv_state(int index, const char *name,
 	stop_psscr_table[index].mask = psscr_mask;
 }
 
+/*
+ * Returns 0 if prop1_len == prop2_len. Else returns -1
+ */
+static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
+					 const char *prop2, int prop2_len)
+{
+	if (prop1_len == prop2_len)
+		return 0;
+
+	pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
+		prop1, prop2);
+	return -1;
+}
+
 static int powernv_add_idle_states(void)
 {
 	struct device_node *power_mgt;
 	int nr_idle_states = 1; /* Snooze */
-	int dt_idle_states;
+	int dt_idle_states, count;
 	u32 latency_ns[CPUIDLE_STATE_MAX];
 	u32 residency_ns[CPUIDLE_STATE_MAX];
 	u32 flags[CPUIDLE_STATE_MAX];
@@ -244,6 +257,21 @@ static int powernv_add_idle_states(void)
 		goto out;
 	}
 
+	count = of_property_count_u32_elems(power_mgt,
+					    "ibm,cpu-idle-state-latencies-ns");
+
+	if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
+				   "ibm,cpu-idle-state-latencies-ns",
+				   count) != 0)
+		goto out;
+
+	count = of_property_count_strings(power_mgt,
+					  "ibm,cpu-idle-state-names");
+	if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
+				   "ibm,cpu-idle-state-names",
+				   count) != 0)
+		goto out;
+
 	/*
 	 * Since snooze is used as first idle state, max idle states allowed is
 	 * CPUIDLE_STATE_MAX -1
@@ -278,6 +306,22 @@ static int powernv_add_idle_states(void)
 	has_stop_states = (flags[0] &
 			   (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
 	if (has_stop_states) {
+		count = of_property_count_u64_elems(power_mgt,
+						    "ibm,cpu-idle-state-psscr");
+		if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					   dt_idle_states,
+					   "ibm,cpu-idle-state-psscr",
+					   count) != 0)
+			goto out;
+
+		count = of_property_count_u64_elems(power_mgt,
+						    "ibm,cpu-idle-state-psscr-mask");
+		if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					   dt_idle_states,
+					   "ibm,cpu-idle-state-psscr-mask",
+					   count) != 0)
+			goto out;
+
 		if (of_property_read_u64_array(power_mgt,
 		    "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
 			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
@@ -292,8 +336,21 @@ static int powernv_add_idle_states(void)
 		}
 	}
 
-	rc = of_property_read_u32_array(power_mgt,
-		"ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
+	count = of_property_count_u32_elems(power_mgt,
+					    "ibm,cpu-idle-state-residency-ns");
+
+	if (count < 0) {
+		rc = count;
+	} else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
+					  dt_idle_states,
+					  "ibm,cpu-idle-state-residency-ns",
+					  count) != 0) {
+		goto out;
+	} else {
+		rc = of_property_read_u32_array(power_mgt,
+						"ibm,cpu-idle-state-residency-ns",
+						residency_ns, dt_idle_states);
+	}
 
 	for (i = 0; i < dt_idle_states; i++) {
 		unsigned int exit_latency, target_residency;
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index 71576b8bdfef..a4f2fa1091e4 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -25,6 +25,35 @@
 #define DEVFREQ_GOV_SUSPEND			0x4
 #define DEVFREQ_GOV_RESUME			0x5
 
+/**
+ * struct devfreq_governor - Devfreq policy governor
+ * @node:		list node - contains registered devfreq governors
+ * @name:		Governor's name
+ * @immutable:		Immutable flag for governor. If the value is 1,
+ *			this govenror is never changeable to other governor.
+ * @get_target_freq:	Returns desired operating frequency for the device.
+ *			Basically, get_target_freq will run
+ *			devfreq_dev_profile.get_dev_status() to get the
+ *			status of the device (load = busy_time / total_time).
+ *			If no_central_polling is set, this callback is called
+ *			only with update_devfreq() notified by OPP.
+ * @event_handler:      Callback for devfreq core framework to notify events
+ *                      to governors. Events include per device governor
+ *                      init and exit, opp changes out of devfreq, suspend
+ *                      and resume of per device devfreq during device idle.
+ *
+ * Note that the callbacks are called with devfreq->lock locked by devfreq.
+ */
+struct devfreq_governor {
+	struct list_head node;
+
+	const char name[DEVFREQ_NAME_LEN];
+	const unsigned int immutable;
+	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+	int (*event_handler)(struct devfreq *devfreq,
+				unsigned int event, void *data);
+};
+
 /* Caution: devfreq->lock must be locked before calling update_devfreq */
 extern int update_devfreq(struct devfreq *devfreq);
 
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 4773f2867234..96afb2aeed18 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -10,26 +10,16 @@ config EDAC_SUPPORT
 	bool
 
 menuconfig EDAC
-	bool "EDAC (Error Detection And Correction) reporting"
-	depends on HAS_IOMEM && EDAC_SUPPORT
+	tristate "EDAC (Error Detection And Correction) reporting"
+	depends on HAS_IOMEM && EDAC_SUPPORT && RAS
 	help
-	  EDAC is designed to report errors in the core system.
-	  These are low-level errors that are reported in the CPU or
-	  supporting chipset or other subsystems:
+	  EDAC is a subsystem along with hardware-specific drivers designed to
+	  report hardware errors. These are low-level errors that are reported
+	  in the CPU or supporting chipset or other subsystems:
 	  memory errors, cache errors, PCI errors, thermal throttling, etc..
 	  If unsure, select 'Y'.
 
-	  If this code is reporting problems on your system, please
-	  see the EDAC project web pages for more information at:
-
-	  <http://bluesmoke.sourceforge.net/>
-
-	  and:
-
-	  <http://buttersideup.com/edacwiki>
-
-	  There is also a mailing list for the EDAC project, which can
-	  be found via the sourceforge page.
+	  The mailing list for the EDAC project is linux-edac@vger.kernel.org.
 
 if EDAC
 
@@ -62,21 +52,9 @@ config EDAC_DECODE_MCE
 	  which occur really early upon boot, before the module infrastructure
 	  has been initialized.
 
-config EDAC_MM_EDAC
-	tristate "Main Memory EDAC (Error Detection And Correction) reporting"
-	select RAS
-	help
-	  Some systems are able to detect and correct errors in main
-	  memory.  EDAC can report statistics on memory error
-	  detection and correction (EDAC - or commonly referred to ECC
-	  errors).  EDAC will also try to decode where these errors
-	  occurred so that a particular failing memory module can be
-	  replaced.  If unsure, select 'Y'.
-
 config EDAC_GHES
 	bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
-	depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y)
-	default y
+	depends on ACPI_APEI_GHES && (EDAC=y)
 	help
 	  Not all machines support hardware-driven error report. Some of those
 	  provide a BIOS-driven error report mechanism via ACPI, using the
@@ -98,7 +76,7 @@ config EDAC_GHES
 
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64)"
-	depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE
+	depends on AMD_NB && EDAC_DECODE_MCE
 	help
 	  Support for error detection and correction of DRAM ECC errors on
 	  the AMD64 families (>= K8) of memory controllers.
@@ -124,28 +102,28 @@ config EDAC_AMD64_ERROR_INJECTION
 
 config EDAC_AMD76X
 	tristate "AMD 76x (760, 762, 768)"
-	depends on EDAC_MM_EDAC && PCI && X86_32
+	depends on PCI && X86_32
 	help
 	  Support for error detection and correction on the AMD 76x
 	  series of chipsets used with the Athlon processor.
 
 config EDAC_E7XXX
 	tristate "Intel e7xxx (e7205, e7500, e7501, e7505)"
-	depends on EDAC_MM_EDAC && PCI && X86_32
+	depends on PCI && X86_32
 	help
 	  Support for error detection and correction on the Intel
 	  E7205, E7500, E7501 and E7505 server chipsets.
 
 config EDAC_E752X
 	tristate "Intel e752x (e7520, e7525, e7320) and 3100"
-	depends on EDAC_MM_EDAC && PCI && X86
+	depends on PCI && X86
 	help
 	  Support for error detection and correction on the Intel
 	  E7520, E7525, E7320 server chipsets.
 
 config EDAC_I82443BXGX
 	tristate "Intel 82443BX/GX (440BX/GX)"
-	depends on EDAC_MM_EDAC && PCI && X86_32
+	depends on PCI && X86_32
 	depends on BROKEN
 	help
 	  Support for error detection and correction on the Intel
@@ -153,56 +131,56 @@ config EDAC_I82443BXGX
 
 config EDAC_I82875P
 	tristate "Intel 82875p (D82875P, E7210)"
-	depends on EDAC_MM_EDAC && PCI && X86_32
+	depends on PCI && X86_32
 	help
 	  Support for error detection and correction on the Intel
 	  DP82785P and E7210 server chipsets.
 
 config EDAC_I82975X
 	tristate "Intel 82975x (D82975x)"
-	depends on EDAC_MM_EDAC && PCI && X86
+	depends on PCI && X86
 	help
 	  Support for error detection and correction on the Intel
 	  DP82975x server chipsets.
 
 config EDAC_I3000
 	tristate "Intel 3000/3010"
-	depends on EDAC_MM_EDAC && PCI && X86
+	depends on PCI && X86
 	help
 	  Support for error detection and correction on the Intel
 	  3000 and 3010 server chipsets.
 
 config EDAC_I3200
 	tristate "Intel 3200"
-	depends on EDAC_MM_EDAC && PCI && X86
+	depends on PCI && X86
 	help
 	  Support for error detection and correction on the Intel
 	  3200 and 3210 server chipsets.
 
 config EDAC_IE31200
 	tristate "Intel e312xx"
-	depends on EDAC_MM_EDAC && PCI && X86
+	depends on PCI && X86
 	help
 	  Support for error detection and correction on the Intel
 	  E3-1200 based DRAM controllers.
 
 config EDAC_X38
 	tristate "Intel X38"
-	depends on EDAC_MM_EDAC && PCI && X86
+	depends on PCI && X86
 	help
 	  Support for error detection and correction on the Intel
 	  X38 server chipsets.
 
 config EDAC_I5400
 	tristate "Intel 5400 (Seaburg) chipsets"
-	depends on EDAC_MM_EDAC && PCI && X86
+	depends on PCI && X86
 	help
 	  Support for error detection and correction the Intel
 	  i5400 MCH chipset (Seaburg).
 
 config EDAC_I7CORE
 	tristate "Intel i7 Core (Nehalem) processors"
-	depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
+	depends on PCI && X86 && X86_MCE_INTEL
 	help
 	  Support for error detection and correction the Intel
 	  i7 Core (Nehalem) Integrated Memory Controller that exists on
@@ -211,58 +189,56 @@ config EDAC_I7CORE
 
 config EDAC_I82860
 	tristate "Intel 82860"
-	depends on EDAC_MM_EDAC && PCI && X86_32
+	depends on PCI && X86_32
 	help
 	  Support for error detection and correction on the Intel
 	  82860 chipset.
 
 config EDAC_R82600
 	tristate "Radisys 82600 embedded chipset"
-	depends on EDAC_MM_EDAC && PCI && X86_32
+	depends on PCI && X86_32
 	help
 	  Support for error detection and correction on the Radisys
 	  82600 embedded chipset.
 
 config EDAC_I5000
 	tristate "Intel Greencreek/Blackford chipset"
-	depends on EDAC_MM_EDAC && X86 && PCI
+	depends on X86 && PCI
 	help
 	  Support for error detection and correction the Intel
 	  Greekcreek/Blackford chipsets.
 
 config EDAC_I5100
 	tristate "Intel San Clemente MCH"
-	depends on EDAC_MM_EDAC && X86 && PCI
+	depends on X86 && PCI
 	help
 	  Support for error detection and correction the Intel
 	  San Clemente MCH.
 
 config EDAC_I7300
 	tristate "Intel Clarksboro MCH"
-	depends on EDAC_MM_EDAC && X86 && PCI
+	depends on X86 && PCI
 	help
 	  Support for error detection and correction the Intel
 	  Clarksboro MCH (Intel 7300 chipset).
 
 config EDAC_SBRIDGE
 	tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC"
-	depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
-	depends on PCI_MMCONFIG
+	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
 	help
 	  Support for error detection and correction the Intel
 	  Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
 
 config EDAC_SKX
 	tristate "Intel Skylake server Integrated MC"
-	depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
-	depends on PCI_MMCONFIG
+	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
 	help
 	  Support for error detection and correction the Intel
 	  Skylake server Integrated Memory Controllers.
 
 config EDAC_PND2
 	tristate "Intel Pondicherry2"
-	depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+	depends on PCI && X86_64 && X86_MCE_INTEL
 	help
 	  Support for error detection and correction on the Intel
 	  Pondicherry2 Integrated Memory Controller. This SoC IP is
@@ -271,36 +247,35 @@ config EDAC_PND2
 
 config EDAC_MPC85XX
 	tristate "Freescale MPC83xx / MPC85xx"
-	depends on EDAC_MM_EDAC && FSL_SOC
+	depends on FSL_SOC
 	help
 	  Support for error detection and correction on the Freescale
 	  MPC8349, MPC8560, MPC8540, MPC8548, T4240
 
 config EDAC_LAYERSCAPE
 	tristate "Freescale Layerscape DDR"
-	depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE
+	depends on ARCH_LAYERSCAPE
 	help
 	  Support for error detection and correction on Freescale memory
 	  controllers on Layerscape SoCs.
 
 config EDAC_MV64X60
 	tristate "Marvell MV64x60"
-	depends on EDAC_MM_EDAC && MV64X60
+	depends on MV64X60
 	help
 	  Support for error detection and correction on the Marvell
 	  MV64360 and MV64460 chipsets.
 
 config EDAC_PASEMI
 	tristate "PA Semi PWRficient"
-	depends on EDAC_MM_EDAC && PCI
-	depends on PPC_PASEMI
+	depends on PPC_PASEMI && PCI
 	help
 	  Support for error detection and correction on PA Semi
 	  PWRficient.
 
 config EDAC_CELL
 	tristate "Cell Broadband Engine memory controller"
-	depends on EDAC_MM_EDAC && PPC_CELL_COMMON
+	depends on PPC_CELL_COMMON
 	help
 	  Support for error detection and correction on the
 	  Cell Broadband Engine internal memory controller
@@ -308,7 +283,7 @@ config EDAC_CELL
 
 config EDAC_PPC4XX
 	tristate "PPC4xx IBM DDR2 Memory Controller"
-	depends on EDAC_MM_EDAC && 4xx
+	depends on 4xx
 	help
 	  This enables support for EDAC on the ECC memory used
 	  with the IBM DDR2 memory controller found in various
@@ -317,7 +292,7 @@ config EDAC_PPC4XX
 
 config EDAC_AMD8131
 	tristate "AMD8131 HyperTransport PCI-X Tunnel"
-	depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
+	depends on PCI && PPC_MAPLE
 	help
 	  Support for error detection and correction on the
 	  AMD8131 HyperTransport PCI-X Tunnel chip.
@@ -326,7 +301,7 @@ config EDAC_AMD8131
 
 config EDAC_AMD8111
 	tristate "AMD8111 HyperTransport I/O Hub"
-	depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
+	depends on PCI && PPC_MAPLE
 	help
 	  Support for error detection and correction on the
 	  AMD8111 HyperTransport I/O Hub chip.
@@ -335,7 +310,7 @@ config EDAC_AMD8111
 
 config EDAC_CPC925
 	tristate "IBM CPC925 Memory Controller (PPC970FX)"
-	depends on EDAC_MM_EDAC && PPC64
+	depends on PPC64
 	help
 	  Support for error detection and correction on the
 	  IBM CPC925 Bridge and Memory Controller, which is
@@ -344,7 +319,7 @@ config EDAC_CPC925
 
 config EDAC_TILE
 	tristate "Tilera Memory Controller"
-	depends on EDAC_MM_EDAC && TILE
+	depends on TILE
 	default y
 	help
 	  Support for error detection and correction on the
@@ -352,49 +327,59 @@ config EDAC_TILE
 
 config EDAC_HIGHBANK_MC
 	tristate "Highbank Memory Controller"
-	depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+	depends on ARCH_HIGHBANK
 	help
 	  Support for error detection and correction on the
 	  Calxeda Highbank memory controller.
 
 config EDAC_HIGHBANK_L2
 	tristate "Highbank L2 Cache"
-	depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+	depends on ARCH_HIGHBANK
 	help
 	  Support for error detection and correction on the
 	  Calxeda Highbank memory controller.
 
 config EDAC_OCTEON_PC
 	tristate "Cavium Octeon Primary Caches"
-	depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
+	depends on CPU_CAVIUM_OCTEON
 	help
 	  Support for error detection and correction on the primary caches of
 	  the cnMIPS cores of Cavium Octeon family SOCs.
 
 config EDAC_OCTEON_L2C
 	tristate "Cavium Octeon Secondary Caches (L2C)"
-	depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
+	depends on CAVIUM_OCTEON_SOC
 	help
 	  Support for error detection and correction on the
 	  Cavium Octeon family of SOCs.
 
 config EDAC_OCTEON_LMC
 	tristate "Cavium Octeon DRAM Memory Controller (LMC)"
-	depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
+	depends on CAVIUM_OCTEON_SOC
 	help
 	  Support for error detection and correction on the
 	  Cavium Octeon family of SOCs.
 
 config EDAC_OCTEON_PCI
 	tristate "Cavium Octeon PCI Controller"
-	depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC
+	depends on PCI && CAVIUM_OCTEON_SOC
 	help
 	  Support for error detection and correction on the
 	  Cavium Octeon family of SOCs.
 
+config EDAC_THUNDERX
+	tristate "Cavium ThunderX EDAC"
+	depends on ARM64
+	depends on PCI
+	help
+	  Support for error detection and correction on the
+	  Cavium ThunderX memory controllers (LMC), Cache
+	  Coherent Processor Interconnect (CCPI) and L2 cache
+	  blocks (TAD, CBC, MCI).
+
 config EDAC_ALTERA
 	bool "Altera SOCFPGA ECC"
-	depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
+	depends on EDAC=y && ARCH_SOCFPGA
 	help
 	  Support for error detection and correction on the
 	  Altera SOCs. This must be selected for SDRAM ECC.
@@ -460,14 +445,14 @@ config EDAC_ALTERA_SDMMC
 
 config EDAC_SYNOPSYS
 	tristate "Synopsys DDR Memory Controller"
-	depends on EDAC_MM_EDAC && ARCH_ZYNQ
+	depends on ARCH_ZYNQ
 	help
 	  Support for error detection and correction on the Synopsys DDR
 	  memory controller.
 
 config EDAC_XGENE
 	tristate "APM X-Gene SoC"
-	depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST)
+	depends on (ARM64 || COMPILE_TEST)
 	help
 	  Support for error detection and correction on the
 	  APM X-Gene family of SOCs.
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 587107e90996..0fd9ffa63299 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -6,8 +6,7 @@
 # GNU General Public License.
 #
 
-obj-$(CONFIG_EDAC)			:= edac_stub.o
-obj-$(CONFIG_EDAC_MM_EDAC)		+= edac_core.o
+obj-$(CONFIG_EDAC)			:= edac_core.o
 
 edac_core-y	:= edac_mc.o edac_device.o edac_mc_sysfs.o
 edac_core-y	+= edac_module.o edac_device_sysfs.o wq.o
@@ -67,13 +66,14 @@ obj-$(CONFIG_EDAC_AMD8131)		+= amd8131_edac.o
 
 obj-$(CONFIG_EDAC_TILE)			+= tile_edac.o
 
-obj-$(CONFIG_EDAC_HIGHBANK_MC)	+= highbank_mc_edac.o
-obj-$(CONFIG_EDAC_HIGHBANK_L2)	+= highbank_l2_edac.o
+obj-$(CONFIG_EDAC_HIGHBANK_MC)		+= highbank_mc_edac.o
+obj-$(CONFIG_EDAC_HIGHBANK_L2)		+= highbank_l2_edac.o
 
 obj-$(CONFIG_EDAC_OCTEON_PC)		+= octeon_edac-pc.o
 obj-$(CONFIG_EDAC_OCTEON_L2C)		+= octeon_edac-l2c.o
 obj-$(CONFIG_EDAC_OCTEON_LMC)		+= octeon_edac-lmc.o
 obj-$(CONFIG_EDAC_OCTEON_PCI)		+= octeon_edac-pci.o
+obj-$(CONFIG_EDAC_THUNDERX)		+= thunderx_edac.o
 
 obj-$(CONFIG_EDAC_ALTERA)		+= altera_edac.o
 obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index c5a5b91f37f0..7717b094fabb 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1023,13 +1023,23 @@ out:
 	return ret;
 }
 
+static int socfpga_is_a10(void)
+{
+	return of_machine_is_compatible("altr,socfpga-arria10");
+}
+
 static int validate_parent_available(struct device_node *np);
 static const struct of_device_id altr_edac_a10_device_of_match[];
 static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
 {
 	int irq;
-	struct device_node *child, *np = of_find_compatible_node(NULL, NULL,
-					"altr,socfpga-a10-ecc-manager");
+	struct device_node *child, *np;
+
+	if (!socfpga_is_a10())
+		return -ENODEV;
+
+	np = of_find_compatible_node(NULL, NULL,
+				     "altr,socfpga-a10-ecc-manager");
 	if (!np) {
 		edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n");
 		return -ENODEV;
@@ -1545,8 +1555,12 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = {
 static int __init socfpga_init_sdmmc_ecc(void)
 {
 	int rc = -ENODEV;
-	struct device_node *child = of_find_compatible_node(NULL, NULL,
-						"altr,socfpga-sdmmc-ecc");
+	struct device_node *child;
+
+	if (!socfpga_is_a10())
+		return -ENODEV;
+
+	child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
 	if (!child) {
 		edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n");
 		return -ENODEV;
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index e5573c56b15e..480072139b7a 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -40,6 +40,11 @@
 #define edac_atomic_scrub(va, size) do { } while (0)
 #endif
 
+int edac_op_state = EDAC_OPSTATE_INVAL;
+EXPORT_SYMBOL_GPL(edac_op_state);
+
+static int edac_report = EDAC_REPORTING_ENABLED;
+
 /* lock to memory controller's control array */
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
@@ -52,6 +57,65 @@ static void const *edac_mc_owner;
 
 static struct bus_type mc_bus[EDAC_MAX_MCS];
 
+int edac_get_report_status(void)
+{
+	return edac_report;
+}
+EXPORT_SYMBOL_GPL(edac_get_report_status);
+
+void edac_set_report_status(int new)
+{
+	if (new == EDAC_REPORTING_ENABLED ||
+	    new == EDAC_REPORTING_DISABLED ||
+	    new == EDAC_REPORTING_FORCE)
+		edac_report = new;
+}
+EXPORT_SYMBOL_GPL(edac_set_report_status);
+
+static int edac_report_set(const char *str, const struct kernel_param *kp)
+{
+	if (!str)
+		return -EINVAL;
+
+	if (!strncmp(str, "on", 2))
+		edac_report = EDAC_REPORTING_ENABLED;
+	else if (!strncmp(str, "off", 3))
+		edac_report = EDAC_REPORTING_DISABLED;
+	else if (!strncmp(str, "force", 5))
+		edac_report = EDAC_REPORTING_FORCE;
+
+	return 0;
+}
+
+static int edac_report_get(char *buffer, const struct kernel_param *kp)
+{
+	int ret = 0;
+
+	switch (edac_report) {
+	case EDAC_REPORTING_ENABLED:
+		ret = sprintf(buffer, "on");
+		break;
+	case EDAC_REPORTING_DISABLED:
+		ret = sprintf(buffer, "off");
+		break;
+	case EDAC_REPORTING_FORCE:
+		ret = sprintf(buffer, "force");
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct kernel_param_ops edac_report_ops = {
+	.set = edac_report_set,
+	.get = edac_report_get,
+};
+
+module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
 			         unsigned len)
 {
@@ -505,22 +569,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 EXPORT_SYMBOL_GPL(find_mci_by_dev);
 
 /*
- * handler for EDAC to check if NMI type handler has asserted interrupt
- */
-static int edac_mc_assert_error_check_and_clear(void)
-{
-	int old_state;
-
-	if (edac_op_state == EDAC_OPSTATE_POLL)
-		return 1;
-
-	old_state = edac_err_assert;
-	edac_err_assert = 0;
-
-	return old_state;
-}
-
-/*
  * edac_mc_workq_function
  *	performs the operation scheduled by a workq request
  */
@@ -536,7 +584,7 @@ static void edac_mc_workq_function(struct work_struct *work_req)
 		return;
 	}
 
-	if (edac_mc_assert_error_check_and_clear())
+	if (edac_op_state == EDAC_OPSTATE_POLL)
 		mci->edac_check(mci);
 
 	mutex_unlock(&mem_ctls_mutex);
@@ -601,7 +649,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
 	}
 
 	list_add_tail_rcu(&mci->link, insert_before);
-	atomic_inc(&edac_handlers);
 	return 0;
 
 fail0:
@@ -619,7 +666,6 @@ fail1:
 
 static int del_mc_from_global_list(struct mem_ctl_info *mci)
 {
-	int handlers = atomic_dec_return(&edac_handlers);
 	list_del_rcu(&mci->link);
 
 	/* these are for safe removal of devices from global list while
@@ -628,7 +674,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci)
 	synchronize_rcu();
 	INIT_LIST_HEAD(&mci->link);
 
-	return handlers;
+	return list_empty(&mc_devices);
 }
 
 struct mem_ctl_info *edac_mc_find(int idx)
@@ -763,7 +809,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 	/* mark MCI offline: */
 	mci->op_state = OP_OFFLINE;
 
-	if (!del_mc_from_global_list(mci))
+	if (del_mc_from_global_list(mci))
 		edac_mc_owner = NULL;
 
 	mutex_unlock(&mem_ctls_mutex);
@@ -1195,10 +1241,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 
 	/* Report the error via the trace interface */
 	grain_bits = fls_long(e->grain) + 1;
-	trace_mc_event(type, e->msg, e->label, e->error_count,
-		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
-		       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
-		       grain_bits, e->syndrome, e->other_detail);
+
+	if (IS_ENABLED(CONFIG_RAS))
+		trace_mc_event(type, e->msg, e->label, e->error_count,
+			       mci->mc_idx, e->top_layer, e->mid_layer,
+			       e->low_layer,
+			       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
+			       grain_bits, e->syndrome, e->other_detail);
 
 	edac_raw_mc_handle_error(type, mci, e);
 }
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
deleted file mode 100644
index 952e411f01f2..000000000000
--- a/drivers/edac/edac_stub.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * common EDAC components that must be in kernel
- *
- * Author: Dave Jiang <djiang@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc.
- * 2010 (c) Advanced Micro Devices Inc.
- *	    Borislav Petkov <bp@alien8.de>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- *
- */
-#include <linux/module.h>
-#include <linux/edac.h>
-#include <linux/atomic.h>
-#include <linux/device.h>
-
-int edac_op_state = EDAC_OPSTATE_INVAL;
-EXPORT_SYMBOL_GPL(edac_op_state);
-
-atomic_t edac_handlers = ATOMIC_INIT(0);
-EXPORT_SYMBOL_GPL(edac_handlers);
-
-int edac_err_assert = 0;
-EXPORT_SYMBOL_GPL(edac_err_assert);
-
-int edac_report_status = EDAC_REPORTING_ENABLED;
-EXPORT_SYMBOL_GPL(edac_report_status);
-
-static int __init edac_report_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-
-	if (!strncmp(str, "on", 2))
-		set_edac_report_status(EDAC_REPORTING_ENABLED);
-	else if (!strncmp(str, "off", 3))
-		set_edac_report_status(EDAC_REPORTING_DISABLED);
-	else if (!strncmp(str, "force", 5))
-		set_edac_report_status(EDAC_REPORTING_FORCE);
-
-	return 0;
-}
-__setup("edac_report=", edac_report_setup);
-
-/*
- * called to determine if there is an EDAC driver interested in
- * knowing an event (such as NMI) occurred
- */
-int edac_handler_set(void)
-{
-	if (edac_op_state == EDAC_OPSTATE_POLL)
-		return 0;
-
-	return atomic_read(&edac_handlers);
-}
-EXPORT_SYMBOL_GPL(edac_handler_set);
-
-/*
- * handler for NMI type of interrupts to assert error
- */
-void edac_atomic_assert_error(void)
-{
-	edac_err_assert++;
-}
-EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 928e0dba41fc..1cad5a9af8d0 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1349,7 +1349,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 	struct dram_addr daddr;
 	char *type;
 
-	if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
 
 	mci = pnd2_mci;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index a65ea44e3b0b..ea21cb651b3c 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct sbridge_pvt *pvt;
 	char *type;
 
-	if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
 
 	mci = get_mci_for_node_id(mce->socketid);
@@ -3441,7 +3441,7 @@ static int __init sbridge_init(void)
 
 	if (rc >= 0) {
 		mce_register_decode_chain(&sbridge_mce_dec);
-		if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+		if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 			sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
 		return 0;
 	}
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
index 1159dba4671f..64bef6c9cfb4 100644
--- a/drivers/edac/skx_edac.c
+++ b/drivers/edac/skx_edac.c
@@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct mem_ctl_info *mci;
 	char *type;
 
-	if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
 
 	/* ignore unless this is memory related with an address */
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
new file mode 100644
index 000000000000..86d585cb6d32
--- /dev/null
+++ b/drivers/edac/thunderx_edac.c
@@ -0,0 +1,2174 @@
+/*
+ * Cavium ThunderX memory controller kernel module
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/string.h>
+#include <linux/stop_machine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+
+#include <asm/page.h>
+
+#include "edac_module.h"
+
+#define phys_to_pfn(phys)	(PFN_DOWN(phys))
+
+#define THUNDERX_NODE		GENMASK(45, 44)
+
+enum {
+	ERR_CORRECTED	= 1,
+	ERR_UNCORRECTED	= 2,
+	ERR_UNKNOWN	= 3,
+};
+
+#define MAX_SYNDROME_REGS 4
+
+struct error_syndrome {
+	u64 reg[MAX_SYNDROME_REGS];
+};
+
+struct error_descr {
+	int	type;
+	u64	mask;
+	char	*descr;
+};
+
+static void decode_register(char *str, size_t size,
+			   const struct error_descr *descr,
+			   const uint64_t reg)
+{
+	int ret = 0;
+
+	while (descr->type && descr->mask && descr->descr) {
+		if (reg & descr->mask) {
+			ret = snprintf(str, size, "\n\t%s, %s",
+				       descr->type == ERR_CORRECTED ?
+					 "Corrected" : "Uncorrected",
+				       descr->descr);
+			str += ret;
+			size -= ret;
+		}
+		descr++;
+	}
+}
+
+static unsigned long get_bits(unsigned long data, int pos, int width)
+{
+	return (data >> pos) & ((1 << width) - 1);
+}
+
+#define L2C_CTL			0x87E080800000
+#define L2C_CTL_DISIDXALIAS	BIT(0)
+
+#define PCI_DEVICE_ID_THUNDER_LMC 0xa022
+
+#define LMC_FADR		0x20
+#define LMC_FADR_FDIMM(x)	((x >> 37) & 0x1)
+#define LMC_FADR_FBUNK(x)	((x >> 36) & 0x1)
+#define LMC_FADR_FBANK(x)	((x >> 32) & 0xf)
+#define LMC_FADR_FROW(x)	((x >> 14) & 0xffff)
+#define LMC_FADR_FCOL(x)	((x >> 0) & 0x1fff)
+
+#define LMC_NXM_FADR		0x28
+#define LMC_ECC_SYND		0x38
+
+#define LMC_ECC_PARITY_TEST	0x108
+
+#define LMC_INT_W1S		0x150
+
+#define LMC_INT_ENA_W1C		0x158
+#define LMC_INT_ENA_W1S		0x160
+
+#define LMC_CONFIG		0x188
+
+#define LMC_CONFIG_BG2		BIT(62)
+#define LMC_CONFIG_RANK_ENA	BIT(42)
+#define LMC_CONFIG_PBANK_LSB(x)	(((x) >> 5) & 0xF)
+#define LMC_CONFIG_ROW_LSB(x)	(((x) >> 2) & 0x7)
+
+#define LMC_CONTROL		0x190
+#define LMC_CONTROL_XOR_BANK	BIT(16)
+
+#define LMC_INT			0x1F0
+
+#define LMC_INT_DDR_ERR		BIT(11)
+#define LMC_INT_DED_ERR		(0xFUL << 5)
+#define LMC_INT_SEC_ERR         (0xFUL << 1)
+#define LMC_INT_NXM_WR_MASK	BIT(0)
+
+#define LMC_DDR_PLL_CTL		0x258
+#define LMC_DDR_PLL_CTL_DDR4	BIT(29)
+
+#define LMC_FADR_SCRAMBLED	0x330
+
+#define LMC_INT_UE              (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
+				 LMC_INT_NXM_WR_MASK)
+
+#define LMC_INT_CE		(LMC_INT_SEC_ERR)
+
+static const struct error_descr lmc_errors[] = {
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = LMC_INT_SEC_ERR,
+		.descr = "Single-bit ECC error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = LMC_INT_DDR_ERR,
+		.descr = "DDR chip error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = LMC_INT_DED_ERR,
+		.descr = "Double-bit ECC error",
+	},
+	{
+		.type = ERR_UNCORRECTED,
+		.mask = LMC_INT_NXM_WR_MASK,
+		.descr = "Non-existent memory write",
+	},
+	{0, 0, NULL},
+};
+
+#define LMC_INT_EN_DDR_ERROR_ALERT_ENA	BIT(5)
+#define LMC_INT_EN_DLCRAM_DED_ERR	BIT(4)
+#define LMC_INT_EN_DLCRAM_SEC_ERR	BIT(3)
+#define LMC_INT_INTR_DED_ENA		BIT(2)
+#define LMC_INT_INTR_SEC_ENA		BIT(1)
+#define LMC_INT_INTR_NXM_WR_ENA		BIT(0)
+
+#define LMC_INT_ENA_ALL			GENMASK(5, 0)
+
+#define LMC_DDR_PLL_CTL		0x258
+#define LMC_DDR_PLL_CTL_DDR4	BIT(29)
+
+#define LMC_CONTROL		0x190
+#define LMC_CONTROL_RDIMM	BIT(0)
+
+#define LMC_SCRAM_FADR		0x330
+
+#define LMC_CHAR_MASK0		0x228
+#define LMC_CHAR_MASK2		0x238
+
+#define RING_ENTRIES	8
+
+struct debugfs_entry {
+	const char *name;
+	umode_t mode;
+	const struct file_operations fops;
+};
+
+struct lmc_err_ctx {
+	u64 reg_int;
+	u64 reg_fadr;
+	u64 reg_nxm_fadr;
+	u64 reg_scram_fadr;
+	u64 reg_ecc_synd;
+};
+
+struct thunderx_lmc {
+	void __iomem *regs;
+	struct pci_dev *pdev;
+	struct msix_entry msix_ent;
+
+	atomic_t ecc_int;
+
+	u64 mask0;
+	u64 mask2;
+	u64 parity_test;
+	u64 node;
+
+	int xbits;
+	int bank_width;
+	int pbank_lsb;
+	int dimm_lsb;
+	int rank_lsb;
+	int bank_lsb;
+	int row_lsb;
+	int col_hi_lsb;
+
+	int xor_bank;
+	int l2c_alias;
+
+	struct page *mem;
+
+	struct lmc_err_ctx err_ctx[RING_ENTRIES];
+	unsigned long ring_head;
+	unsigned long ring_tail;
+};
+
+#define ring_pos(pos, size) ((pos) & (size - 1))
+
+#define DEBUGFS_STRUCT(_name, _mode, _write, _read)			    \
+static struct debugfs_entry debugfs_##_name = {				    \
+	.name = __stringify(_name),					    \
+	.mode = VERIFY_OCTAL_PERMISSIONS(_mode),			    \
+	.fops = {							    \
+		.open = simple_open,					    \
+		.write = _write,					    \
+		.read  = _read,						    \
+		.llseek = generic_file_llseek,				    \
+	},								    \
+}
+
+#define DEBUGFS_FIELD_ATTR(_type, _field)				    \
+static ssize_t thunderx_##_type##_##_field##_read(struct file *file,	    \
+					    char __user *data,		    \
+					    size_t count, loff_t *ppos)	    \
+{									    \
+	struct thunderx_##_type *pdata = file->private_data;		    \
+	char buf[20];							    \
+									    \
+	snprintf(buf, count, "0x%016llx", pdata->_field);		    \
+	return simple_read_from_buffer(data, count, ppos,		    \
+				       buf, sizeof(buf));		    \
+}									    \
+									    \
+static ssize_t thunderx_##_type##_##_field##_write(struct file *file,	    \
+					     const char __user *data,	    \
+					     size_t count, loff_t *ppos)    \
+{									    \
+	struct thunderx_##_type *pdata = file->private_data;		    \
+	int res;							    \
+									    \
+	res = kstrtoull_from_user(data, count, 0, &pdata->_field);	    \
+									    \
+	return res ? res : count;					    \
+}									    \
+									    \
+DEBUGFS_STRUCT(_field, 0600,						    \
+		   thunderx_##_type##_##_field##_write,			    \
+		   thunderx_##_type##_##_field##_read)			    \
+
+#define DEBUGFS_REG_ATTR(_type, _name, _reg)				    \
+static ssize_t thunderx_##_type##_##_name##_read(struct file *file,	    \
+					   char __user *data,		    \
+					   size_t count, loff_t *ppos)      \
+{									    \
+	struct thunderx_##_type *pdata = file->private_data;		    \
+	char buf[20];							    \
+									    \
+	sprintf(buf, "0x%016llx", readq(pdata->regs + _reg));		    \
+	return simple_read_from_buffer(data, count, ppos,		    \
+				       buf, sizeof(buf));		    \
+}									    \
+									    \
+static ssize_t thunderx_##_type##_##_name##_write(struct file *file,	    \
+					    const char __user *data,	    \
+					    size_t count, loff_t *ppos)     \
+{									    \
+	struct thunderx_##_type *pdata = file->private_data;		    \
+	u64 val;							    \
+	int res;							    \
+									    \
+	res = kstrtoull_from_user(data, count, 0, &val);		    \
+									    \
+	if (!res) {							    \
+		writeq(val, pdata->regs + _reg);			    \
+		res = count;						    \
+	}								    \
+									    \
+	return res;							    \
+}									    \
+									    \
+DEBUGFS_STRUCT(_name, 0600,						    \
+	       thunderx_##_type##_##_name##_write,			    \
+	       thunderx_##_type##_##_name##_read)
+
+#define LMC_DEBUGFS_ENT(_field)	DEBUGFS_FIELD_ATTR(lmc, _field)
+
+/*
+ * To get an ECC error injected, the following steps are needed:
+ * - Setup the ECC injection by writing the appropriate parameters:
+ *	echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
+ *	echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
+ *	echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
+ * - Do the actual injection:
+ *	echo 1 > /sys/kernel/debug/<device number>/inject_ecc
+ */
+static ssize_t thunderx_lmc_inject_int_write(struct file *file,
+					     const char __user *data,
+					     size_t count, loff_t *ppos)
+{
+	struct thunderx_lmc *lmc = file->private_data;
+	u64 val;
+	int res;
+
+	res = kstrtoull_from_user(data, count, 0, &val);
+
+	if (!res) {
+		/* Trigger the interrupt */
+		writeq(val, lmc->regs + LMC_INT_W1S);
+		res = count;
+	}
+
+	return res;
+}
+
+static ssize_t thunderx_lmc_int_read(struct file *file,
+				     char __user *data,
+				     size_t count, loff_t *ppos)
+{
+	struct thunderx_lmc *lmc = file->private_data;
+	char buf[20];
+	u64 lmc_int = readq(lmc->regs + LMC_INT);
+
+	snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
+	return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
+}
+
+#define TEST_PATTERN 0xa5
+
+static int inject_ecc_fn(void *arg)
+{
+	struct thunderx_lmc *lmc = arg;
+	uintptr_t addr, phys;
+	unsigned int cline_size = cache_line_size();
+	const unsigned int lines = PAGE_SIZE / cline_size;
+	unsigned int i, cl_idx;
+
+	addr = (uintptr_t)page_address(lmc->mem);
+	phys = (uintptr_t)page_to_phys(lmc->mem);
+
+	cl_idx = (phys & 0x7f) >> 4;
+	lmc->parity_test &= ~(7ULL << 8);
+	lmc->parity_test |= (cl_idx << 8);
+
+	writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
+	writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
+	writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
+
+	readq(lmc->regs + LMC_CHAR_MASK0);
+	readq(lmc->regs + LMC_CHAR_MASK2);
+	readq(lmc->regs + LMC_ECC_PARITY_TEST);
+
+	for (i = 0; i < lines; i++) {
+		memset((void *)addr, TEST_PATTERN, cline_size);
+		barrier();
+
+		/*
+		 * Flush L1 cachelines to the PoC (L2).
+		 * This will cause cacheline eviction to the L2.
+		 */
+		asm volatile("dc civac, %0\n"
+			     "dsb sy\n"
+			     : : "r"(addr + i * cline_size));
+	}
+
+	for (i = 0; i < lines; i++) {
+		/*
+		 * Flush L2 cachelines to the DRAM.
+		 * This will cause cacheline eviction to the DRAM
+		 * and ECC corruption according to the masks set.
+		 */
+		__asm__ volatile("sys #0,c11,C1,#2, %0\n"
+				 : : "r"(phys + i * cline_size));
+	}
+
+	for (i = 0; i < lines; i++) {
+		/*
+		 * Invalidate L2 cachelines.
+		 * The subsequent load will cause cacheline fetch
+		 * from the DRAM and an error interrupt
+		 */
+		__asm__ volatile("sys #0,c11,C1,#1, %0"
+				 : : "r"(phys + i * cline_size));
+	}
+
+	for (i = 0; i < lines; i++) {
+		/*
+		 * Invalidate L1 cachelines.
+		 * The subsequent load will cause cacheline fetch
+		 * from the L2 and/or DRAM
+		 */
+		asm volatile("dc ivac, %0\n"
+			     "dsb sy\n"
+			     : : "r"(addr + i * cline_size));
+	}
+
+	return 0;
+}
+
+static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
+					     const char __user *data,
+					     size_t count, loff_t *ppos)
+{
+	struct thunderx_lmc *lmc = file->private_data;
+
+	unsigned int cline_size = cache_line_size();
+
+	u8 tmp[cline_size];
+	void __iomem *addr;
+	unsigned int offs, timeout = 100000;
+
+	atomic_set(&lmc->ecc_int, 0);
+
+	lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
+
+	if (!lmc->mem)
+		return -ENOMEM;
+
+	addr = page_address(lmc->mem);
+
+	while (!atomic_read(&lmc->ecc_int) && timeout--) {
+		stop_machine(inject_ecc_fn, lmc, NULL);
+
+		for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) {
+			/*
+			 * Do a load from the previously rigged location
+			 * This should generate an error interrupt.
+			 */
+			memcpy(tmp, addr + offs, cline_size);
+			asm volatile("dsb ld\n");
+		}
+	}
+
+	__free_pages(lmc->mem, 0);
+
+	return count;
+}
+
+LMC_DEBUGFS_ENT(mask0);
+LMC_DEBUGFS_ENT(mask2);
+LMC_DEBUGFS_ENT(parity_test);
+
+DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
+DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
+DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
+
+struct debugfs_entry *lmc_dfs_ents[] = {
+	&debugfs_mask0,
+	&debugfs_mask2,
+	&debugfs_parity_test,
+	&debugfs_inject_ecc,
+	&debugfs_inject_int,
+	&debugfs_int_w1c,
+};
+
+static int thunderx_create_debugfs_nodes(struct dentry *parent,
+					  struct debugfs_entry *attrs[],
+					  void *data,
+					  size_t num)
+{
+	int i;
+	struct dentry *ent;
+
+	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
+		return 0;
+
+	if (!parent)
+		return -ENOENT;
+
+	for (i = 0; i < num; i++) {
+		ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
+					       parent, data, &attrs[i]->fops);
+
+		if (!ent)
+			break;
+	}
+
+	return i;
+}
+
+static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
+{
+	phys_addr_t addr = 0;
+	int bank, xbits;
+
+	addr |= lmc->node << 40;
+	addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
+	addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
+	addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
+	addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
+
+	bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
+
+	if (lmc->xor_bank)
+		bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
+
+	addr |= bank << lmc->bank_lsb;
+
+	xbits = PCI_FUNC(lmc->pdev->devfn);
+
+	if (lmc->l2c_alias)
+		xbits ^= get_bits(addr, 20, lmc->xbits) ^
+			 get_bits(addr, 12, lmc->xbits);
+
+	addr |= xbits << 7;
+
+	return addr;
+}
+
+static unsigned int thunderx_get_num_lmcs(unsigned int node)
+{
+	unsigned int number = 0;
+	struct pci_dev *pdev = NULL;
+
+	do {
+		pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+				      PCI_DEVICE_ID_THUNDER_LMC,
+				      pdev);
+		if (pdev) {
+#ifdef CONFIG_NUMA
+			if (pdev->dev.numa_node == node)
+				number++;
+#else
+			number++;
+#endif
+		}
+	} while (pdev);
+
+	return number;
+}
+
+#define LMC_MESSAGE_SIZE	120
+#define LMC_OTHER_SIZE		(50 * ARRAY_SIZE(lmc_errors))
+
+static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
+{
+	struct mem_ctl_info *mci = dev_id;
+	struct thunderx_lmc *lmc = mci->pvt_info;
+
+	unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
+	struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
+
+	writeq(0, lmc->regs + LMC_CHAR_MASK0);
+	writeq(0, lmc->regs + LMC_CHAR_MASK2);
+	writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
+
+	ctx->reg_int = readq(lmc->regs + LMC_INT);
+	ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
+	ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
+	ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
+	ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
+
+	lmc->ring_head++;
+
+	atomic_set(&lmc->ecc_int, 1);
+
+	/* Clear the interrupt */
+	writeq(ctx->reg_int, lmc->regs + LMC_INT);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
+{
+	struct mem_ctl_info *mci = dev_id;
+	struct thunderx_lmc *lmc = mci->pvt_info;
+	phys_addr_t phys_addr;
+
+	unsigned long tail;
+	struct lmc_err_ctx *ctx;
+
+	irqreturn_t ret = IRQ_NONE;
+
+	char *msg;
+	char *other;
+
+	msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
+	other =  kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
+
+	if (!msg || !other)
+		goto err_free;
+
+	while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
+		ARRAY_SIZE(lmc->err_ctx))) {
+		tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
+
+		ctx = &lmc->err_ctx[tail];
+
+		dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
+			ctx->reg_int);
+		dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
+			ctx->reg_fadr);
+		dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
+			ctx->reg_nxm_fadr);
+		dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
+			ctx->reg_scram_fadr);
+		dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
+			ctx->reg_ecc_synd);
+
+		snprintf(msg, LMC_MESSAGE_SIZE,
+			 "DIMM %lld rank %lld bank %lld row %lld col %lld",
+			 LMC_FADR_FDIMM(ctx->reg_scram_fadr),
+			 LMC_FADR_FBUNK(ctx->reg_scram_fadr),
+			 LMC_FADR_FBANK(ctx->reg_scram_fadr),
+			 LMC_FADR_FROW(ctx->reg_scram_fadr),
+			 LMC_FADR_FCOL(ctx->reg_scram_fadr));
+
+		decode_register(other, LMC_OTHER_SIZE, lmc_errors,
+				ctx->reg_int);
+
+		phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
+
+		if (ctx->reg_int & LMC_INT_UE)
+			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+					     phys_to_pfn(phys_addr),
+					     offset_in_page(phys_addr),
+					     0, -1, -1, -1, msg, other);
+		else if (ctx->reg_int & LMC_INT_CE)
+			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+					     phys_to_pfn(phys_addr),
+					     offset_in_page(phys_addr),
+					     0, -1, -1, -1, msg, other);
+
+		lmc->ring_tail++;
+	}
+
+	ret = IRQ_HANDLED;
+
+err_free:
+	kfree(msg);
+	kfree(other);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+	return 0;
+}
+
+static int thunderx_lmc_resume(struct pci_dev *pdev)
+{
+	pci_set_power_state(pdev, PCI_D0);
+	pci_enable_wake(pdev, PCI_D0, 0);
+	pci_restore_state(pdev);
+
+	return 0;
+}
+#endif
+
+static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
+	{ 0, },
+};
+
+static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
+{
+	int node = dev_to_node(&pdev->dev);
+	int ret = PCI_FUNC(pdev->devfn);
+
+	ret += max(node, 0) << 3;
+
+	return ret;
+}
+
+static int thunderx_lmc_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	struct thunderx_lmc *lmc;
+	struct edac_mc_layer layer;
+	struct mem_ctl_info *mci;
+	u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
+	int ret;
+	u64 lmc_int;
+	void *l2c_ioaddr;
+
+	layer.type = EDAC_MC_LAYER_SLOT;
+	layer.size = 2;
+	layer.is_virt_csrow = false;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+		return ret;
+	}
+
+	mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
+			    sizeof(struct thunderx_lmc));
+	if (!mci)
+		return -ENOMEM;
+
+	mci->pdev = &pdev->dev;
+	lmc = mci->pvt_info;
+
+	pci_set_drvdata(pdev, mci);
+
+	lmc->regs = pcim_iomap_table(pdev)[0];
+
+	lmc_control = readq(lmc->regs + LMC_CONTROL);
+	lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
+	lmc_config = readq(lmc->regs + LMC_CONFIG);
+
+	if (lmc_control & LMC_CONTROL_RDIMM) {
+		mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
+					   lmc_ddr_pll_ctl) ?
+				MEM_RDDR4 : MEM_RDDR3;
+	} else {
+		mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
+					   lmc_ddr_pll_ctl) ?
+				MEM_DDR4 : MEM_DDR3;
+	}
+
+	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+
+	mci->mod_name = "thunderx-lmc";
+	mci->mod_ver = "1";
+	mci->ctl_name = "thunderx-lmc";
+	mci->dev_name = dev_name(&pdev->dev);
+	mci->scrub_mode = SCRUB_NONE;
+
+	lmc->pdev = pdev;
+	lmc->msix_ent.entry = 0;
+
+	lmc->ring_head = 0;
+	lmc->ring_tail = 0;
+
+	ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
+		goto err_free;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
+					thunderx_lmc_err_isr,
+					thunderx_lmc_threaded_isr, 0,
+					"[EDAC] ThunderX LMC", mci);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
+		goto err_free;
+	}
+
+	lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
+
+	lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
+	lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
+			   FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
+
+	lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
+	lmc->dimm_lsb  = 28 + lmc->pbank_lsb + lmc->xbits;
+	lmc->rank_lsb = lmc->dimm_lsb;
+	lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
+	lmc->bank_lsb = 7 + lmc->xbits;
+	lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
+
+	lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
+
+	lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
+
+	l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node),
+			     PAGE_SIZE);
+
+	if (!l2c_ioaddr) {
+		dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
+		goto err_free;
+	}
+
+	lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
+
+	iounmap(l2c_ioaddr);
+
+	ret = edac_mc_add_mc(mci);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
+		goto err_free;
+	}
+
+	lmc_int = readq(lmc->regs + LMC_INT);
+	writeq(lmc_int, lmc->regs + LMC_INT);
+
+	writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
+
+	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+		ret = thunderx_create_debugfs_nodes(mci->debugfs,
+						    lmc_dfs_ents,
+						    lmc,
+						    ARRAY_SIZE(lmc_dfs_ents));
+
+		if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
+			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
+				 ret, ret >= 0 ? " created" : "");
+		}
+	}
+
+	return 0;
+
+err_free:
+	pci_set_drvdata(pdev, NULL);
+	edac_mc_free(mci);
+
+	return ret;
+}
+
+static void thunderx_lmc_remove(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci = pci_get_drvdata(pdev);
+	struct thunderx_lmc *lmc = mci->pvt_info;
+
+	writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
+
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+}
+
+MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
+
+static struct pci_driver thunderx_lmc_driver = {
+	.name     = "thunderx_lmc_edac",
+	.probe    = thunderx_lmc_probe,
+	.remove   = thunderx_lmc_remove,
+#ifdef CONFIG_PM
+	.suspend  = thunderx_lmc_suspend,
+	.resume   = thunderx_lmc_resume,
+#endif
+	.id_table = thunderx_lmc_pci_tbl,
+};
+
+/*---------------------- OCX driver ---------------------------------*/
+
+#define PCI_DEVICE_ID_THUNDER_OCX 0xa013
+
+#define OCX_LINK_INTS		3
+#define OCX_INTS		(OCX_LINK_INTS + 1)
+#define OCX_RX_LANES		24
+#define OCX_RX_LANE_STATS	15
+
+#define OCX_COM_INT		0x100
+#define OCX_COM_INT_W1S		0x108
+#define OCX_COM_INT_ENA_W1S	0x110
+#define OCX_COM_INT_ENA_W1C	0x118
+
+#define OCX_COM_IO_BADID		BIT(54)
+#define OCX_COM_MEM_BADID		BIT(53)
+#define OCX_COM_COPR_BADID		BIT(52)
+#define OCX_COM_WIN_REQ_BADID		BIT(51)
+#define OCX_COM_WIN_REQ_TOUT		BIT(50)
+#define OCX_COM_RX_LANE			GENMASK(23, 0)
+
+#define OCX_COM_INT_CE			(OCX_COM_IO_BADID      | \
+					 OCX_COM_MEM_BADID     | \
+					 OCX_COM_COPR_BADID    | \
+					 OCX_COM_WIN_REQ_BADID | \
+					 OCX_COM_WIN_REQ_TOUT)
+
+static const struct error_descr ocx_com_errors[] = {
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_IO_BADID,
+		.descr = "Invalid IO transaction node ID",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_MEM_BADID,
+		.descr = "Invalid memory transaction node ID",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_COPR_BADID,
+		.descr = "Invalid coprocessor transaction node ID",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_WIN_REQ_BADID,
+		.descr = "Invalid SLI transaction node ID",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_WIN_REQ_TOUT,
+		.descr = "Window/core request timeout",
+	},
+	{0, 0, NULL},
+};
+
+#define OCX_COM_LINKX_INT(x)		(0x120 + (x) * 8)
+#define OCX_COM_LINKX_INT_W1S(x)	(0x140 + (x) * 8)
+#define OCX_COM_LINKX_INT_ENA_W1S(x)	(0x160 + (x) * 8)
+#define OCX_COM_LINKX_INT_ENA_W1C(x)	(0x180 + (x) * 8)
+
+#define OCX_COM_LINK_BAD_WORD			BIT(13)
+#define OCX_COM_LINK_ALIGN_FAIL			BIT(12)
+#define OCX_COM_LINK_ALIGN_DONE			BIT(11)
+#define OCX_COM_LINK_UP				BIT(10)
+#define OCX_COM_LINK_STOP			BIT(9)
+#define OCX_COM_LINK_BLK_ERR			BIT(8)
+#define OCX_COM_LINK_REINIT			BIT(7)
+#define OCX_COM_LINK_LNK_DATA			BIT(6)
+#define OCX_COM_LINK_RXFIFO_DBE			BIT(5)
+#define OCX_COM_LINK_RXFIFO_SBE			BIT(4)
+#define OCX_COM_LINK_TXFIFO_DBE			BIT(3)
+#define OCX_COM_LINK_TXFIFO_SBE			BIT(2)
+#define OCX_COM_LINK_REPLAY_DBE			BIT(1)
+#define OCX_COM_LINK_REPLAY_SBE			BIT(0)
+
+static const struct error_descr ocx_com_link_errors[] = {
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_LINK_REPLAY_SBE,
+		.descr = "Replay buffer single-bit error",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_LINK_TXFIFO_SBE,
+		.descr = "TX FIFO single-bit error",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_LINK_RXFIFO_SBE,
+		.descr = "RX FIFO single-bit error",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_LINK_BLK_ERR,
+		.descr = "Block code error",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_LINK_ALIGN_FAIL,
+		.descr = "Link alignment failure",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_COM_LINK_BAD_WORD,
+		.descr = "Bad code word",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = OCX_COM_LINK_REPLAY_DBE,
+		.descr = "Replay buffer double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = OCX_COM_LINK_TXFIFO_DBE,
+		.descr = "TX FIFO double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = OCX_COM_LINK_RXFIFO_DBE,
+		.descr = "RX FIFO double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = OCX_COM_LINK_STOP,
+		.descr = "Link stopped",
+	},
+	{0, 0, NULL},
+};
+
+#define OCX_COM_LINK_INT_UE       (OCX_COM_LINK_REPLAY_DBE | \
+				   OCX_COM_LINK_TXFIFO_DBE | \
+				   OCX_COM_LINK_RXFIFO_DBE | \
+				   OCX_COM_LINK_STOP)
+
+#define OCX_COM_LINK_INT_CE       (OCX_COM_LINK_REPLAY_SBE | \
+				   OCX_COM_LINK_TXFIFO_SBE | \
+				   OCX_COM_LINK_RXFIFO_SBE | \
+				   OCX_COM_LINK_BLK_ERR    | \
+				   OCX_COM_LINK_ALIGN_FAIL | \
+				   OCX_COM_LINK_BAD_WORD)
+
+#define OCX_LNE_INT(x)			(0x8018 + (x) * 0x100)
+#define OCX_LNE_INT_EN(x)		(0x8020 + (x) * 0x100)
+#define OCX_LNE_BAD_CNT(x)		(0x8028 + (x) * 0x100)
+#define OCX_LNE_CFG(x)			(0x8000 + (x) * 0x100)
+#define OCX_LNE_STAT(x, y)		(0x8040 + (x) * 0x100 + (y) * 8)
+
+#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS		BIT(8)
+#define OCX_LNE_CFG_RX_STAT_WRAP_DIS		BIT(2)
+#define OCX_LNE_CFG_RX_STAT_RDCLR		BIT(1)
+#define OCX_LNE_CFG_RX_STAT_ENA			BIT(0)
+
+
+#define OCX_LANE_BAD_64B67B			BIT(8)
+#define OCX_LANE_DSKEW_FIFO_OVFL		BIT(5)
+#define OCX_LANE_SCRM_SYNC_LOSS			BIT(4)
+#define OCX_LANE_UKWN_CNTL_WORD			BIT(3)
+#define OCX_LANE_CRC32_ERR			BIT(2)
+#define OCX_LANE_BDRY_SYNC_LOSS			BIT(1)
+#define OCX_LANE_SERDES_LOCK_LOSS		BIT(0)
+
+#define OCX_COM_LANE_INT_UE       (0)
+#define OCX_COM_LANE_INT_CE       (OCX_LANE_SERDES_LOCK_LOSS | \
+				   OCX_LANE_BDRY_SYNC_LOSS   | \
+				   OCX_LANE_CRC32_ERR        | \
+				   OCX_LANE_UKWN_CNTL_WORD   | \
+				   OCX_LANE_SCRM_SYNC_LOSS   | \
+				   OCX_LANE_DSKEW_FIFO_OVFL  | \
+				   OCX_LANE_BAD_64B67B)
+
+static const struct error_descr ocx_lane_errors[] = {
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_LANE_SERDES_LOCK_LOSS,
+		.descr = "RX SerDes lock lost",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_LANE_BDRY_SYNC_LOSS,
+		.descr = "RX word boundary lost",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_LANE_CRC32_ERR,
+		.descr = "CRC32 error",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_LANE_UKWN_CNTL_WORD,
+		.descr = "Unknown control word",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_LANE_SCRM_SYNC_LOSS,
+		.descr = "Scrambler synchronization lost",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_LANE_DSKEW_FIFO_OVFL,
+		.descr = "RX deskew FIFO overflow",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = OCX_LANE_BAD_64B67B,
+		.descr = "Bad 64B/67B codeword",
+	},
+	{0, 0, NULL},
+};
+
+#define OCX_LNE_INT_ENA_ALL		(GENMASK(9, 8) | GENMASK(6, 0))
+#define OCX_COM_INT_ENA_ALL		(GENMASK(54, 50) | GENMASK(23, 0))
+#define OCX_COM_LINKX_INT_ENA_ALL	(GENMASK(13, 12) | \
+					 GENMASK(9, 7) | GENMASK(5, 0))
+
+#define OCX_TLKX_ECC_CTL(x)		(0x10018 + (x) * 0x2000)
+#define OCX_RLKX_ECC_CTL(x)		(0x18018 + (x) * 0x2000)
+
+struct ocx_com_err_ctx {
+	u64 reg_com_int;
+	u64 reg_lane_int[OCX_RX_LANES];
+	u64 reg_lane_stat11[OCX_RX_LANES];
+};
+
+struct ocx_link_err_ctx {
+	u64 reg_com_link_int;
+	int link;
+};
+
+struct thunderx_ocx {
+	void __iomem *regs;
+	int com_link;
+	struct pci_dev *pdev;
+	struct edac_device_ctl_info *edac_dev;
+
+	struct dentry *debugfs;
+	struct msix_entry msix_ent[OCX_INTS];
+
+	struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
+	struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
+
+	unsigned long com_ring_head;
+	unsigned long com_ring_tail;
+
+	unsigned long link_ring_head;
+	unsigned long link_ring_tail;
+};
+
+#define OCX_MESSAGE_SIZE	SZ_1K
+#define OCX_OTHER_SIZE		(50 * ARRAY_SIZE(ocx_com_link_errors))
+
+/* This handler is threaded */
+static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+						msix_ent[msix->entry]);
+
+	int lane;
+	unsigned long head = ring_pos(ocx->com_ring_head,
+				      ARRAY_SIZE(ocx->com_err_ctx));
+	struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
+
+	ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
+
+	for (lane = 0; lane < OCX_RX_LANES; lane++) {
+		ctx->reg_lane_int[lane] =
+			readq(ocx->regs + OCX_LNE_INT(lane));
+		ctx->reg_lane_stat11[lane] =
+			readq(ocx->regs + OCX_LNE_STAT(lane, 11));
+
+		writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
+	}
+
+	writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
+
+	ocx->com_ring_head++;
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+						msix_ent[msix->entry]);
+
+	irqreturn_t ret = IRQ_NONE;
+
+	unsigned long tail;
+	struct ocx_com_err_ctx *ctx;
+	int lane;
+	char *msg;
+	char *other;
+
+	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
+	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
+
+	if (!msg || !other)
+		goto err_free;
+
+	while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
+			ARRAY_SIZE(ocx->com_err_ctx))) {
+		tail = ring_pos(ocx->com_ring_tail,
+				ARRAY_SIZE(ocx->com_err_ctx));
+		ctx = &ocx->com_err_ctx[tail];
+
+		snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
+			ocx->edac_dev->ctl_name, ctx->reg_com_int);
+
+		decode_register(other, OCX_OTHER_SIZE,
+				ocx_com_errors, ctx->reg_com_int);
+
+		strncat(msg, other, OCX_MESSAGE_SIZE);
+
+		for (lane = 0; lane < OCX_RX_LANES; lane++)
+			if (ctx->reg_com_int & BIT(lane)) {
+				snprintf(other, OCX_OTHER_SIZE,
+					 "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
+					 lane, ctx->reg_lane_int[lane],
+					 lane, ctx->reg_lane_stat11[lane]);
+
+				strncat(msg, other, OCX_MESSAGE_SIZE);
+
+				decode_register(other, OCX_OTHER_SIZE,
+						ocx_lane_errors,
+						ctx->reg_lane_int[lane]);
+				strncat(msg, other, OCX_MESSAGE_SIZE);
+			}
+
+		if (ctx->reg_com_int & OCX_COM_INT_CE)
+			edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
+
+		ocx->com_ring_tail++;
+	}
+
+	ret = IRQ_HANDLED;
+
+err_free:
+	kfree(other);
+	kfree(msg);
+
+	return ret;
+}
+
+static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+						msix_ent[msix->entry]);
+	unsigned long head = ring_pos(ocx->link_ring_head,
+				      ARRAY_SIZE(ocx->link_err_ctx));
+	struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
+
+	ctx->link = msix->entry;
+	ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
+
+	writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
+
+	ocx->link_ring_head++;
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+						msix_ent[msix->entry]);
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long tail;
+	struct ocx_link_err_ctx *ctx;
+
+	char *msg;
+	char *other;
+
+	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
+	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
+
+	if (!msg || !other)
+		goto err_free;
+
+	while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
+			ARRAY_SIZE(ocx->link_err_ctx))) {
+		tail = ring_pos(ocx->link_ring_head,
+				ARRAY_SIZE(ocx->link_err_ctx));
+
+		ctx = &ocx->link_err_ctx[tail];
+
+		snprintf(msg, OCX_MESSAGE_SIZE,
+			 "%s: OCX_COM_LINK_INT[%d]: %016llx",
+			 ocx->edac_dev->ctl_name,
+			 ctx->link, ctx->reg_com_link_int);
+
+		decode_register(other, OCX_OTHER_SIZE,
+				ocx_com_link_errors, ctx->reg_com_link_int);
+
+		strncat(msg, other, OCX_MESSAGE_SIZE);
+
+		if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
+			edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
+		else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
+			edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
+
+		ocx->link_ring_tail++;
+	}
+
+	ret = IRQ_HANDLED;
+err_free:
+	kfree(other);
+	kfree(msg);
+
+	return ret;
+}
+
+#define OCX_DEBUGFS_ATTR(_name, _reg)	DEBUGFS_REG_ATTR(ocx, _name, _reg)
+
+OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
+OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
+OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
+
+OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
+OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
+OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
+
+OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
+OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
+OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
+
+OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
+OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
+OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
+OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
+OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
+OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
+OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
+OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
+
+OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
+OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
+OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
+OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
+OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
+OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
+OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
+OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
+
+OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
+OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
+OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
+OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
+OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
+OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
+OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
+OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
+
+OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
+
+struct debugfs_entry *ocx_dfs_ents[] = {
+	&debugfs_tlk0_ecc_ctl,
+	&debugfs_tlk1_ecc_ctl,
+	&debugfs_tlk2_ecc_ctl,
+
+	&debugfs_rlk0_ecc_ctl,
+	&debugfs_rlk1_ecc_ctl,
+	&debugfs_rlk2_ecc_ctl,
+
+	&debugfs_com_link0_int,
+	&debugfs_com_link1_int,
+	&debugfs_com_link2_int,
+
+	&debugfs_lne00_badcnt,
+	&debugfs_lne01_badcnt,
+	&debugfs_lne02_badcnt,
+	&debugfs_lne03_badcnt,
+	&debugfs_lne04_badcnt,
+	&debugfs_lne05_badcnt,
+	&debugfs_lne06_badcnt,
+	&debugfs_lne07_badcnt,
+	&debugfs_lne08_badcnt,
+	&debugfs_lne09_badcnt,
+	&debugfs_lne10_badcnt,
+	&debugfs_lne11_badcnt,
+	&debugfs_lne12_badcnt,
+	&debugfs_lne13_badcnt,
+	&debugfs_lne14_badcnt,
+	&debugfs_lne15_badcnt,
+	&debugfs_lne16_badcnt,
+	&debugfs_lne17_badcnt,
+	&debugfs_lne18_badcnt,
+	&debugfs_lne19_badcnt,
+	&debugfs_lne20_badcnt,
+	&debugfs_lne21_badcnt,
+	&debugfs_lne22_badcnt,
+	&debugfs_lne23_badcnt,
+
+	&debugfs_com_int,
+};
+
+static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
+	{ 0, },
+};
+
+static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
+{
+	int lane, stat, cfg;
+
+	for (lane = 0; lane < OCX_RX_LANES; lane++) {
+		cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
+		cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
+		cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
+		writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
+
+		for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
+			readq(ocx->regs + OCX_LNE_STAT(lane, stat));
+	}
+}
+
+static int thunderx_ocx_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *id)
+{
+	struct thunderx_ocx *ocx;
+	struct edac_device_ctl_info *edac_dev;
+	char name[32];
+	int idx;
+	int i;
+	int ret;
+	u64 reg;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+		return ret;
+	}
+
+	idx = edac_device_alloc_index();
+	snprintf(name, sizeof(name), "OCX%d", idx);
+	edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
+					      name, 1, "CCPI", 1,
+					      0, NULL, 0, idx);
+	if (!edac_dev) {
+		dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
+		return -ENOMEM;
+	}
+	ocx = edac_dev->pvt_info;
+	ocx->edac_dev = edac_dev;
+	ocx->com_ring_head = 0;
+	ocx->com_ring_tail = 0;
+	ocx->link_ring_head = 0;
+	ocx->link_ring_tail = 0;
+
+	ocx->regs = pcim_iomap_table(pdev)[0];
+	if (!ocx->regs) {
+		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+		ret = -ENODEV;
+		goto err_free;
+	}
+
+	ocx->pdev = pdev;
+
+	for (i = 0; i < OCX_INTS; i++) {
+		ocx->msix_ent[i].entry = i;
+		ocx->msix_ent[i].vector = 0;
+	}
+
+	ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
+		goto err_free;
+	}
+
+	for (i = 0; i < OCX_INTS; i++) {
+		ret = devm_request_threaded_irq(&pdev->dev,
+						ocx->msix_ent[i].vector,
+						(i == 3) ?
+						 thunderx_ocx_com_isr :
+						 thunderx_ocx_lnk_isr,
+						(i == 3) ?
+						 thunderx_ocx_com_threaded_isr :
+						 thunderx_ocx_lnk_threaded_isr,
+						0, "[EDAC] ThunderX OCX",
+						&ocx->msix_ent[i]);
+		if (ret)
+			goto err_free;
+	}
+
+	edac_dev->dev = &pdev->dev;
+	edac_dev->dev_name = dev_name(&pdev->dev);
+	edac_dev->mod_name = "thunderx-ocx";
+	edac_dev->ctl_name = "thunderx-ocx";
+
+	ret = edac_device_add_device(edac_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
+		goto err_free;
+	}
+
+	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+		ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
+
+		ret = thunderx_create_debugfs_nodes(ocx->debugfs,
+						    ocx_dfs_ents,
+						    ocx,
+						    ARRAY_SIZE(ocx_dfs_ents));
+		if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
+			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
+				 ret, ret >= 0 ? " created" : "");
+		}
+	}
+
+	pci_set_drvdata(pdev, edac_dev);
+
+	thunderx_ocx_clearstats(ocx);
+
+	for (i = 0; i < OCX_RX_LANES; i++) {
+		writeq(OCX_LNE_INT_ENA_ALL,
+		       ocx->regs + OCX_LNE_INT_EN(i));
+
+		reg = readq(ocx->regs + OCX_LNE_INT(i));
+		writeq(reg, ocx->regs + OCX_LNE_INT(i));
+
+	}
+
+	for (i = 0; i < OCX_LINK_INTS; i++) {
+		reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
+		writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
+
+		writeq(OCX_COM_LINKX_INT_ENA_ALL,
+		       ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
+	}
+
+	reg = readq(ocx->regs + OCX_COM_INT);
+	writeq(reg, ocx->regs + OCX_COM_INT);
+
+	writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
+
+	return 0;
+err_free:
+	edac_device_free_ctl_info(edac_dev);
+
+	return ret;
+}
+
+static void thunderx_ocx_remove(struct pci_dev *pdev)
+{
+	struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
+	struct thunderx_ocx *ocx = edac_dev->pvt_info;
+	int i;
+
+	writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
+
+	for (i = 0; i < OCX_INTS; i++) {
+		writeq(OCX_COM_LINKX_INT_ENA_ALL,
+		       ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
+	}
+
+	edac_debugfs_remove_recursive(ocx->debugfs);
+
+	edac_device_del_device(&pdev->dev);
+	edac_device_free_ctl_info(edac_dev);
+}
+
+MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
+
+static struct pci_driver thunderx_ocx_driver = {
+	.name     = "thunderx_ocx_edac",
+	.probe    = thunderx_ocx_probe,
+	.remove   = thunderx_ocx_remove,
+	.id_table = thunderx_ocx_pci_tbl,
+};
+
+/*---------------------- L2C driver ---------------------------------*/
+
+#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
+#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
+#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
+
+#define L2C_TAD_INT_W1C		0x40000
+#define L2C_TAD_INT_W1S		0x40008
+
+#define L2C_TAD_INT_ENA_W1C	0x40020
+#define L2C_TAD_INT_ENA_W1S	0x40028
+
+
+#define L2C_TAD_INT_L2DDBE	 BIT(1)
+#define L2C_TAD_INT_SBFSBE	 BIT(2)
+#define L2C_TAD_INT_SBFDBE	 BIT(3)
+#define L2C_TAD_INT_FBFSBE	 BIT(4)
+#define L2C_TAD_INT_FBFDBE	 BIT(5)
+#define L2C_TAD_INT_TAGDBE	 BIT(9)
+#define L2C_TAD_INT_RDDISLMC	 BIT(15)
+#define L2C_TAD_INT_WRDISLMC	 BIT(16)
+#define L2C_TAD_INT_LFBTO	 BIT(17)
+#define L2C_TAD_INT_GSYNCTO	 BIT(18)
+#define L2C_TAD_INT_RTGSBE	 BIT(32)
+#define L2C_TAD_INT_RTGDBE	 BIT(33)
+#define L2C_TAD_INT_RDDISOCI	 BIT(34)
+#define L2C_TAD_INT_WRDISOCI	 BIT(35)
+
+#define L2C_TAD_INT_ECC		(L2C_TAD_INT_L2DDBE | \
+				 L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
+				 L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
+
+#define L2C_TAD_INT_CE          (L2C_TAD_INT_SBFSBE | \
+				 L2C_TAD_INT_FBFSBE)
+
+#define L2C_TAD_INT_UE          (L2C_TAD_INT_L2DDBE | \
+				 L2C_TAD_INT_SBFDBE | \
+				 L2C_TAD_INT_FBFDBE | \
+				 L2C_TAD_INT_TAGDBE | \
+				 L2C_TAD_INT_RTGDBE | \
+				 L2C_TAD_INT_WRDISOCI | \
+				 L2C_TAD_INT_RDDISOCI | \
+				 L2C_TAD_INT_WRDISLMC | \
+				 L2C_TAD_INT_RDDISLMC | \
+				 L2C_TAD_INT_LFBTO    | \
+				 L2C_TAD_INT_GSYNCTO)
+
+static const struct error_descr l2_tad_errors[] = {
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = L2C_TAD_INT_SBFSBE,
+		.descr = "SBF single-bit error",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = L2C_TAD_INT_FBFSBE,
+		.descr = "FBF single-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_L2DDBE,
+		.descr = "L2D double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_SBFDBE,
+		.descr = "SBF double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_FBFDBE,
+		.descr = "FBF double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_TAGDBE,
+		.descr = "TAG double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_RTGDBE,
+		.descr = "RTG double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_WRDISOCI,
+		.descr = "Write to a disabled CCPI",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_RDDISOCI,
+		.descr = "Read from a disabled CCPI",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_WRDISLMC,
+		.descr = "Write to a disabled LMC",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_RDDISLMC,
+		.descr = "Read from a disabled LMC",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_LFBTO,
+		.descr = "LFB entry timeout",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_TAD_INT_GSYNCTO,
+		.descr = "Global sync CCPI timeout",
+	},
+	{0, 0, NULL},
+};
+
+#define L2C_TAD_INT_TAG		(L2C_TAD_INT_TAGDBE)
+
+#define L2C_TAD_INT_RTG		(L2C_TAD_INT_RTGDBE)
+
+#define L2C_TAD_INT_DISLMC	(L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
+
+#define L2C_TAD_INT_DISOCI	(L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
+
+#define L2C_TAD_INT_ENA_ALL	(L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
+				 L2C_TAD_INT_RTG | \
+				 L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
+				 L2C_TAD_INT_LFBTO)
+
+#define L2C_TAD_TIMETWO		0x50000
+#define L2C_TAD_TIMEOUT		0x50100
+#define L2C_TAD_ERR		0x60000
+#define L2C_TAD_TQD_ERR		0x60100
+#define L2C_TAD_TTG_ERR		0x60200
+
+
+#define L2C_CBC_INT_W1C		0x60000
+
+#define L2C_CBC_INT_RSDSBE	 BIT(0)
+#define L2C_CBC_INT_RSDDBE	 BIT(1)
+
+#define L2C_CBC_INT_RSD		 (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
+
+#define L2C_CBC_INT_MIBSBE	 BIT(4)
+#define L2C_CBC_INT_MIBDBE	 BIT(5)
+
+#define L2C_CBC_INT_MIB		 (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
+
+#define L2C_CBC_INT_IORDDISOCI	 BIT(6)
+#define L2C_CBC_INT_IOWRDISOCI	 BIT(7)
+
+#define L2C_CBC_INT_IODISOCI	 (L2C_CBC_INT_IORDDISOCI | \
+				  L2C_CBC_INT_IOWRDISOCI)
+
+#define L2C_CBC_INT_CE		 (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
+#define L2C_CBC_INT_UE		 (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
+
+
+static const struct error_descr l2_cbc_errors[] = {
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = L2C_CBC_INT_RSDSBE,
+		.descr = "RSD single-bit error",
+	},
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = L2C_CBC_INT_MIBSBE,
+		.descr = "MIB single-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_CBC_INT_RSDDBE,
+		.descr = "RSD double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_CBC_INT_MIBDBE,
+		.descr = "MIB double-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_CBC_INT_IORDDISOCI,
+		.descr = "Read from a disabled CCPI",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_CBC_INT_IOWRDISOCI,
+		.descr = "Write to a disabled CCPI",
+	},
+	{0, 0, NULL},
+};
+
+#define L2C_CBC_INT_W1S		0x60008
+#define L2C_CBC_INT_ENA_W1C	0x60020
+
+#define L2C_CBC_INT_ENA_ALL	 (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
+				  L2C_CBC_INT_IODISOCI)
+
+#define L2C_CBC_INT_ENA_W1S	0x60028
+
+#define L2C_CBC_IODISOCIERR	0x80008
+#define L2C_CBC_IOCERR		0x80010
+#define L2C_CBC_RSDERR		0x80018
+#define L2C_CBC_MIBERR		0x80020
+
+
+#define L2C_MCI_INT_W1C		0x0
+
+#define L2C_MCI_INT_VBFSBE	 BIT(0)
+#define L2C_MCI_INT_VBFDBE	 BIT(1)
+
+static const struct error_descr l2_mci_errors[] = {
+	{
+		.type  = ERR_CORRECTED,
+		.mask  = L2C_MCI_INT_VBFSBE,
+		.descr = "VBF single-bit error",
+	},
+	{
+		.type  = ERR_UNCORRECTED,
+		.mask  = L2C_MCI_INT_VBFDBE,
+		.descr = "VBF double-bit error",
+	},
+	{0, 0, NULL},
+};
+
+#define L2C_MCI_INT_W1S		0x8
+#define L2C_MCI_INT_ENA_W1C	0x20
+
+#define L2C_MCI_INT_ENA_ALL	 (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
+
+#define L2C_MCI_INT_ENA_W1S	0x28
+
+#define L2C_MCI_ERR		0x10000
+
+#define L2C_MESSAGE_SIZE	SZ_1K
+#define L2C_OTHER_SIZE		(50 * ARRAY_SIZE(l2_tad_errors))
+
+struct l2c_err_ctx {
+	char *reg_ext_name;
+	u64  reg_int;
+	u64  reg_ext;
+};
+
+struct thunderx_l2c {
+	void __iomem *regs;
+	struct pci_dev *pdev;
+	struct edac_device_ctl_info *edac_dev;
+
+	struct dentry *debugfs;
+
+	int index;
+
+	struct msix_entry msix_ent;
+
+	struct l2c_err_ctx err_ctx[RING_ENTRIES];
+	unsigned long ring_head;
+	unsigned long ring_tail;
+};
+
+static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
+						msix_ent);
+
+	unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
+	struct l2c_err_ctx *ctx = &tad->err_ctx[head];
+
+	ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
+
+	if (ctx->reg_int & L2C_TAD_INT_ECC) {
+		ctx->reg_ext_name = "TQD_ERR";
+		ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
+	} else if (ctx->reg_int & L2C_TAD_INT_TAG) {
+		ctx->reg_ext_name = "TTG_ERR";
+		ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
+	} else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
+		ctx->reg_ext_name = "TIMEOUT";
+		ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
+	} else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
+		ctx->reg_ext_name = "ERR";
+		ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
+	}
+
+	writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
+
+	tad->ring_head++;
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
+						msix_ent);
+
+	unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
+	struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
+
+	ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
+
+	if (ctx->reg_int & L2C_CBC_INT_RSD) {
+		ctx->reg_ext_name = "RSDERR";
+		ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
+	} else if (ctx->reg_int & L2C_CBC_INT_MIB) {
+		ctx->reg_ext_name = "MIBERR";
+		ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
+	} else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
+		ctx->reg_ext_name = "IODISOCIERR";
+		ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
+	}
+
+	writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
+
+	cbc->ring_head++;
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
+						msix_ent);
+
+	unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
+	struct l2c_err_ctx *ctx = &mci->err_ctx[head];
+
+	ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
+	ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
+
+	writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
+
+	ctx->reg_ext_name = "ERR";
+
+	mci->ring_head++;
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
+{
+	struct msix_entry *msix = irq_id;
+	struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
+						msix_ent);
+
+	unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
+	struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
+	irqreturn_t ret = IRQ_NONE;
+
+	u64 mask_ue, mask_ce;
+	const struct error_descr *l2_errors;
+	char *reg_int_name;
+
+	char *msg;
+	char *other;
+
+	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
+	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
+
+	if (!msg || !other)
+		goto err_free;
+
+	switch (l2c->pdev->device) {
+	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
+		reg_int_name = "L2C_TAD_INT";
+		mask_ue = L2C_TAD_INT_UE;
+		mask_ce = L2C_TAD_INT_CE;
+		l2_errors = l2_tad_errors;
+		break;
+	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
+		reg_int_name = "L2C_CBC_INT";
+		mask_ue = L2C_CBC_INT_UE;
+		mask_ce = L2C_CBC_INT_CE;
+		l2_errors = l2_cbc_errors;
+		break;
+	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
+		reg_int_name = "L2C_MCI_INT";
+		mask_ue = L2C_MCI_INT_VBFDBE;
+		mask_ce = L2C_MCI_INT_VBFSBE;
+		l2_errors = l2_mci_errors;
+		break;
+	default:
+		dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
+			l2c->pdev->device);
+		return IRQ_NONE;
+	}
+
+	while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
+			ARRAY_SIZE(l2c->err_ctx))) {
+		snprintf(msg, L2C_MESSAGE_SIZE,
+			 "%s: %s: %016llx, %s: %016llx",
+			 l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
+			 ctx->reg_ext_name, ctx->reg_ext);
+
+		decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
+
+		strncat(msg, other, L2C_MESSAGE_SIZE);
+
+		if (ctx->reg_int & mask_ue)
+			edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
+		else if (ctx->reg_int & mask_ce)
+			edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
+
+		l2c->ring_tail++;
+	}
+
+	return IRQ_HANDLED;
+
+err_free:
+	kfree(other);
+	kfree(msg);
+
+	return ret;
+}
+
+#define L2C_DEBUGFS_ATTR(_name, _reg)	DEBUGFS_REG_ATTR(l2c, _name, _reg)
+
+L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
+
+struct debugfs_entry *l2c_tad_dfs_ents[] = {
+	&debugfs_tad_int,
+};
+
+L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
+
+struct debugfs_entry *l2c_cbc_dfs_ents[] = {
+	&debugfs_cbc_int,
+};
+
+L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
+
+struct debugfs_entry *l2c_mci_dfs_ents[] = {
+	&debugfs_mci_int,
+};
+
+static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
+	{ 0, },
+};
+
+static int thunderx_l2c_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *id)
+{
+	struct thunderx_l2c *l2c;
+	struct edac_device_ctl_info *edac_dev;
+	struct debugfs_entry **l2c_devattr;
+	size_t dfs_entries;
+	irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
+	char name[32];
+	const char *fmt;
+	u64 reg_en_offs, reg_en_mask;
+	int idx;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+		return ret;
+	}
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
+		thunderx_l2c_isr = thunderx_l2c_tad_isr;
+		l2c_devattr = l2c_tad_dfs_ents;
+		dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
+		fmt = "L2C-TAD%d";
+		reg_en_offs = L2C_TAD_INT_ENA_W1S;
+		reg_en_mask = L2C_TAD_INT_ENA_ALL;
+		break;
+	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
+		thunderx_l2c_isr = thunderx_l2c_cbc_isr;
+		l2c_devattr = l2c_cbc_dfs_ents;
+		dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
+		fmt = "L2C-CBC%d";
+		reg_en_offs = L2C_CBC_INT_ENA_W1S;
+		reg_en_mask = L2C_CBC_INT_ENA_ALL;
+		break;
+	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
+		thunderx_l2c_isr = thunderx_l2c_mci_isr;
+		l2c_devattr = l2c_mci_dfs_ents;
+		dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
+		fmt = "L2C-MCI%d";
+		reg_en_offs = L2C_MCI_INT_ENA_W1S;
+		reg_en_mask = L2C_MCI_INT_ENA_ALL;
+		break;
+	default:
+		//Should never ever get here
+		dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
+			pdev->device);
+		return -EINVAL;
+	}
+
+	idx = edac_device_alloc_index();
+	snprintf(name, sizeof(name), fmt, idx);
+
+	edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
+					      name, 1, "L2C", 1, 0,
+					      NULL, 0, idx);
+	if (!edac_dev) {
+		dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
+		return -ENOMEM;
+	}
+
+	l2c = edac_dev->pvt_info;
+	l2c->edac_dev = edac_dev;
+
+	l2c->regs = pcim_iomap_table(pdev)[0];
+	if (!l2c->regs) {
+		dev_err(&pdev->dev, "Cannot map PCI resources\n");
+		ret = -ENODEV;
+		goto err_free;
+	}
+
+	l2c->pdev = pdev;
+
+	l2c->ring_head = 0;
+	l2c->ring_tail = 0;
+
+	l2c->msix_ent.entry = 0;
+	l2c->msix_ent.vector = 0;
+
+	ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
+		goto err_free;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
+					thunderx_l2c_isr,
+					thunderx_l2c_threaded_isr,
+					0, "[EDAC] ThunderX L2C",
+					&l2c->msix_ent);
+	if (ret)
+		goto err_free;
+
+	edac_dev->dev = &pdev->dev;
+	edac_dev->dev_name = dev_name(&pdev->dev);
+	edac_dev->mod_name = "thunderx-l2c";
+	edac_dev->ctl_name = "thunderx-l2c";
+
+	ret = edac_device_add_device(edac_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
+		goto err_free;
+	}
+
+	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+		l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
+
+		thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
+					      l2c, dfs_entries);
+
+		if (ret != dfs_entries) {
+			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
+				 ret, ret >= 0 ? " created" : "");
+		}
+	}
+
+	pci_set_drvdata(pdev, edac_dev);
+
+	writeq(reg_en_mask, l2c->regs + reg_en_offs);
+
+	return 0;
+
+err_free:
+	edac_device_free_ctl_info(edac_dev);
+
+	return ret;
+}
+
+static void thunderx_l2c_remove(struct pci_dev *pdev)
+{
+	struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
+	struct thunderx_l2c *l2c = edac_dev->pvt_info;
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
+		writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
+		break;
+	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
+		writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
+		break;
+	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
+		writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
+		break;
+	}
+
+	edac_debugfs_remove_recursive(l2c->debugfs);
+
+	edac_device_del_device(&pdev->dev);
+	edac_device_free_ctl_info(edac_dev);
+}
+
+MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
+
+static struct pci_driver thunderx_l2c_driver = {
+	.name     = "thunderx_l2c_edac",
+	.probe    = thunderx_l2c_probe,
+	.remove   = thunderx_l2c_remove,
+	.id_table = thunderx_l2c_pci_tbl,
+};
+
+static int __init thunderx_edac_init(void)
+{
+	int rc = 0;
+
+	rc = pci_register_driver(&thunderx_lmc_driver);
+	if (rc)
+		return rc;
+
+	rc = pci_register_driver(&thunderx_ocx_driver);
+	if (rc)
+		goto err_lmc;
+
+	rc = pci_register_driver(&thunderx_l2c_driver);
+	if (rc)
+		goto err_ocx;
+
+	return rc;
+err_ocx:
+	pci_unregister_driver(&thunderx_ocx_driver);
+err_lmc:
+	pci_unregister_driver(&thunderx_lmc_driver);
+
+	return rc;
+}
+
+static void __exit thunderx_edac_exit(void)
+{
+	pci_unregister_driver(&thunderx_l2c_driver);
+	pci_unregister_driver(&thunderx_ocx_driver);
+	pci_unregister_driver(&thunderx_lmc_driver);
+
+}
+
+module_init(thunderx_edac_init);
+module_exit(thunderx_edac_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Cavium, Inc.");
+MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");
diff --git a/drivers/extcon/devres.c b/drivers/extcon/devres.c
index b40eb1805927..186fd735eb28 100644
--- a/drivers/extcon/devres.c
+++ b/drivers/extcon/devres.c
@@ -50,6 +50,13 @@ static void devm_extcon_dev_notifier_unreg(struct device *dev, void *res)
 	extcon_unregister_notifier(this->edev, this->id, this->nb);
 }
 
+static void devm_extcon_dev_notifier_all_unreg(struct device *dev, void *res)
+{
+	struct extcon_dev_notifier_devres *this = res;
+
+	extcon_unregister_notifier_all(this->edev, this->nb);
+}
+
 /**
  * devm_extcon_dev_allocate - Allocate managed extcon device
  * @dev:		device owning the extcon device being created
@@ -214,3 +221,57 @@ void devm_extcon_unregister_notifier(struct device *dev,
 			       devm_extcon_dev_match, edev));
 }
 EXPORT_SYMBOL(devm_extcon_unregister_notifier);
+
+/**
+ * devm_extcon_register_notifier_all()
+ *		- Resource-managed extcon_register_notifier_all()
+ * @dev:	device to allocate extcon device
+ * @edev:	the extcon device that has the external connecotr.
+ * @nb:		a notifier block to be registered.
+ *
+ * This function manages automatically the notifier of extcon device using
+ * device resource management and simplify the control of unregistering
+ * the notifier of extcon device. To get more information, refer that function.
+ *
+ * Returns 0 if success or negaive error number if failure.
+ */
+int devm_extcon_register_notifier_all(struct device *dev, struct extcon_dev *edev,
+				struct notifier_block *nb)
+{
+	struct extcon_dev_notifier_devres *ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_extcon_dev_notifier_all_unreg, sizeof(*ptr),
+				GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = extcon_register_notifier_all(edev, nb);
+	if (ret) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	ptr->edev = edev;
+	ptr->nb = nb;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_extcon_register_notifier_all);
+
+/**
+ * devm_extcon_unregister_notifier_all()
+ *		- Resource-managed extcon_unregister_notifier_all()
+ * @dev:	device to allocate extcon device
+ * @edev:	the extcon device that has the external connecotr.
+ * @nb:		a notifier block to be registered.
+ */
+void devm_extcon_unregister_notifier_all(struct device *dev,
+				struct extcon_dev *edev,
+				struct notifier_block *nb)
+{
+	WARN_ON(devres_release(dev, devm_extcon_dev_notifier_all_unreg,
+			       devm_extcon_dev_match, edev));
+}
+EXPORT_SYMBOL(devm_extcon_unregister_notifier_all);
diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index 09ac5e70c2f3..e7750545469f 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -448,8 +448,19 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id)
 	spin_lock_irqsave(&edev->lock, flags);
 
 	state = !!(edev->state & BIT(index));
+
+	/*
+	 * Call functions in a raw notifier chain for the specific one
+	 * external connector.
+	 */
 	raw_notifier_call_chain(&edev->nh[index], state, edev);
 
+	/*
+	 * Call functions in a raw notifier chain for the all supported
+	 * external connectors.
+	 */
+	raw_notifier_call_chain(&edev->nh_all, state, edev);
+
 	/* This could be in interrupt handler */
 	prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
 	if (!prop_buf) {
@@ -954,6 +965,59 @@ int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id,
 }
 EXPORT_SYMBOL_GPL(extcon_unregister_notifier);
 
+/**
+ * extcon_register_notifier_all() - Register a notifier block for all connectors
+ * @edev:	the extcon device that has the external connecotr.
+ * @nb:		a notifier block to be registered.
+ *
+ * This fucntion registers a notifier block in order to receive the state
+ * change of all supported external connectors from extcon device.
+ * And The second parameter given to the callback of nb (val) is
+ * the current state and third parameter is the edev pointer.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_register_notifier_all(struct extcon_dev *edev,
+				struct notifier_block *nb)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!edev || !nb)
+		return -EINVAL;
+
+	spin_lock_irqsave(&edev->lock, flags);
+	ret = raw_notifier_chain_register(&edev->nh_all, nb);
+	spin_unlock_irqrestore(&edev->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(extcon_register_notifier_all);
+
+/**
+ * extcon_unregister_notifier_all() - Unregister a notifier block from extcon.
+ * @edev:	the extcon device that has the external connecotr.
+ * @nb:		a notifier block to be registered.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_unregister_notifier_all(struct extcon_dev *edev,
+				struct notifier_block *nb)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!edev || !nb)
+		return -EINVAL;
+
+	spin_lock_irqsave(&edev->lock, flags);
+	ret = raw_notifier_chain_unregister(&edev->nh_all, nb);
+	spin_unlock_irqrestore(&edev->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(extcon_unregister_notifier_all);
+
 static struct attribute *extcon_attrs[] = {
 	&dev_attr_state.attr,
 	&dev_attr_name.attr,
@@ -1212,6 +1276,8 @@ int extcon_dev_register(struct extcon_dev *edev)
 	for (index = 0; index < edev->max_supported; index++)
 		RAW_INIT_NOTIFIER_HEAD(&edev->nh[index]);
 
+	RAW_INIT_NOTIFIER_HEAD(&edev->nh_all);
+
 	dev_set_drvdata(&edev->dev, edev);
 	edev->state = 0;
 
diff --git a/drivers/extcon/extcon.h b/drivers/extcon/extcon.h
index 993ddccafe11..dddddcfa0587 100644
--- a/drivers/extcon/extcon.h
+++ b/drivers/extcon/extcon.h
@@ -21,6 +21,8 @@
  * @dev:		Device of this extcon.
  * @state:		Attach/detach state of this extcon. Do not provide at
  *			register-time.
+ * @nh_all:		Notifier for the state change events for all supported
+ *			external connectors from this extcon.
  * @nh:			Notifier for the state change events from this extcon
  * @entry:		To support list of extcon devices so that users can
  *			search for extcon devices based on the extcon name.
@@ -43,6 +45,7 @@ struct extcon_dev {
 
 	/* Internal data. Please do not set. */
 	struct device dev;
+	struct raw_notifier_head nh_all;
 	struct raw_notifier_head *nh;
 	struct list_head entry;
 	int max_supported;
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 94250c293be2..c68ac65db7ff 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2006,7 +2006,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
 		return;
 	case HID_DG_TOOLSERIALNUMBER:
 		wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL);
-		wacom_wac->serial[0] |= value;
+		wacom_wac->serial[0] |= (__u32)value;
 		return;
 	case WACOM_HID_WD_SENSE:
 		wacom_wac->hid_data.sense_state = value;
@@ -2176,6 +2176,16 @@ static void wacom_wac_finger_usage_mapping(struct hid_device *hdev,
 		wacom_wac->hid_data.cc_index = field->index;
 		wacom_wac->hid_data.cc_value_index = usage->usage_index;
 		break;
+	case HID_DG_CONTACTID:
+		if ((field->logical_maximum - field->logical_minimum) < touch_max) {
+			/*
+			 * The HID descriptor for G11 sensors leaves logical
+			 * maximum set to '1' despite it being a multitouch
+			 * device. Override to a sensible number.
+			 */
+			field->logical_maximum = 255;
+		}
+		break;
 	}
 }
 
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index 7ef819680acd..26b05106f0d3 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -980,7 +980,7 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 	/* Pad to 32-bits - FIXME: Revisit*/
 	if ((skb->len & 3) && skb_pad(skb, 4 - (skb->len & 3)))
-		goto drop;
+		goto inc_dropped;
 
 	/*
 	 * Modem sends Phonet messages over SSI with its own endianess...
@@ -1032,8 +1032,9 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
 drop2:
 	hsi_free_msg(msg);
 drop:
-	dev->stats.tx_dropped++;
 	dev_kfree_skb(skb);
+inc_dropped:
+	dev->stats.tx_dropped++;
 
 	return 0;
 }
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0649d53f3d16..22d5eafd6815 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -341,6 +341,15 @@ config SENSORS_ASB100
 	  This driver can also be built as a module.  If so, the module
 	  will be called asb100.
 
+config SENSORS_ASPEED
+	tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver"
+	help
+	  This driver provides support for ASPEED AST2400/AST2500 PWM
+	  and Fan Tacho controllers.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called aspeed_pwm_tacho.
+
 config SENSORS_ATXP1
 	tristate "Attansic ATXP1 VID controller"
 	depends on I2C
@@ -1643,16 +1652,6 @@ config SENSORS_TMP421
 	  This driver can also be built as a module.  If so, the module
 	  will be called tmp421.
 
-config SENSORS_TWL4030_MADC
-	tristate "Texas Instruments TWL4030 MADC Hwmon"
-	depends on TWL4030_MADC
-	help
-	If you say yes here you get hwmon support for triton
-	TWL4030-MADC.
-
-	This driver can also be built as a module. If so it will be called
-	twl4030-madc-hwmon.
-
 config SENSORS_VEXPRESS
 	tristate "Versatile Express"
 	depends on VEXPRESS_CONFIG
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 5509edf6186a..d4641a9f16c1 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
 obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
 obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
 obj-$(CONFIG_SENSORS_ASC7621)	+= asc7621.o
+obj-$(CONFIG_SENSORS_ASPEED)	+= aspeed-pwm-tacho.o
 obj-$(CONFIG_SENSORS_ATXP1)	+= atxp1.o
 obj-$(CONFIG_SENSORS_CORETEMP)	+= coretemp.o
 obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o
@@ -157,7 +158,6 @@ obj-$(CONFIG_SENSORS_TMP103)	+= tmp103.o
 obj-$(CONFIG_SENSORS_TMP108)	+= tmp108.o
 obj-$(CONFIG_SENSORS_TMP401)	+= tmp401.o
 obj-$(CONFIG_SENSORS_TMP421)	+= tmp421.o
-obj-$(CONFIG_SENSORS_TWL4030_MADC)+= twl4030-madc-hwmon.o
 obj-$(CONFIG_SENSORS_VEXPRESS)	+= vexpress-hwmon.o
 obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
 obj-$(CONFIG_SENSORS_VIA686A)	+= via686a.o
diff --git a/drivers/hwmon/ad7414.c b/drivers/hwmon/ad7414.c
index 763490acc0df..cec227f13874 100644
--- a/drivers/hwmon/ad7414.c
+++ b/drivers/hwmon/ad7414.c
@@ -217,9 +217,16 @@ static const struct i2c_device_id ad7414_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ad7414_id);
 
+static const struct of_device_id ad7414_of_match[] = {
+	{ .compatible = "ad,ad7414" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ad7414_of_match);
+
 static struct i2c_driver ad7414_driver = {
 	.driver = {
 		.name	= "ad7414",
+		.of_match_table = of_match_ptr(ad7414_of_match),
 	},
 	.probe	= ad7414_probe,
 	.id_table = ad7414_id,
diff --git a/drivers/hwmon/adc128d818.c b/drivers/hwmon/adc128d818.c
index bbe3a5c5b3f5..a557b46dbe8e 100644
--- a/drivers/hwmon/adc128d818.c
+++ b/drivers/hwmon/adc128d818.c
@@ -546,10 +546,17 @@ static const struct i2c_device_id adc128_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, adc128_id);
 
+static const struct of_device_id adc128_of_match[] = {
+	{ .compatible = "ti,adc128d818" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, adc128_of_match);
+
 static struct i2c_driver adc128_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "adc128d818",
+		.of_match_table = of_match_ptr(adc128_of_match),
 	},
 	.probe		= adc128_probe,
 	.remove		= adc128_remove,
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index 2b3105c8aed3..5140c27d16dd 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -31,6 +31,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/of_device.h>
 #include <linux/of.h>
 
 #include <linux/i2c/ads1015.h>
@@ -268,7 +269,12 @@ static int ads1015_probe(struct i2c_client *client,
 			    GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
-	data->id = id->driver_data;
+
+	if (client->dev.of_node)
+		data->id = (enum ads1015_chips)
+			of_device_get_match_data(&client->dev);
+	else
+		data->id = id->driver_data;
 	i2c_set_clientdata(client, data);
 	mutex_init(&data->update_lock);
 
@@ -303,9 +309,23 @@ static const struct i2c_device_id ads1015_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ads1015_id);
 
+static const struct of_device_id ads1015_of_match[] = {
+	{
+		.compatible = "ti,ads1015",
+		.data = (void *)ads1015
+	},
+	{
+		.compatible = "ti,ads1115",
+		.data = (void *)ads1115
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ads1015_of_match);
+
 static struct i2c_driver ads1015_driver = {
 	.driver = {
 		.name = "ads1015",
+		.of_match_table = of_match_ptr(ads1015_of_match),
 	},
 	.probe = ads1015_probe,
 	.remove = ads1015_remove,
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index ee396ff167d9..898607bf682b 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -31,9 +31,11 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_data/ads7828.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
+#include <linux/regulator/consumer.h>
 
 /* The ADS7828 registers */
 #define ADS7828_CMD_SD_SE	0x80	/* Single ended inputs */
@@ -118,9 +120,12 @@ static int ads7828_probe(struct i2c_client *client,
 	struct ads7828_data *data;
 	struct device *hwmon_dev;
 	unsigned int vref_mv = ADS7828_INT_VREF_MV;
+	unsigned int vref_uv;
 	bool diff_input = false;
 	bool ext_vref = false;
 	unsigned int regval;
+	enum ads7828_chips chip;
+	struct regulator *reg;
 
 	data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL);
 	if (!data)
@@ -131,14 +136,32 @@ static int ads7828_probe(struct i2c_client *client,
 		ext_vref = pdata->ext_vref;
 		if (ext_vref && pdata->vref_mv)
 			vref_mv = pdata->vref_mv;
+	} else if (dev->of_node) {
+		diff_input = of_property_read_bool(dev->of_node,
+						   "ti,differential-input");
+		reg = devm_regulator_get_optional(dev, "vref");
+		if (!IS_ERR(reg)) {
+			vref_uv = regulator_get_voltage(reg);
+			vref_mv = DIV_ROUND_CLOSEST(vref_uv, 1000);
+			if (vref_mv < ADS7828_EXT_VREF_MV_MIN ||
+			    vref_mv > ADS7828_EXT_VREF_MV_MAX)
+				return -EINVAL;
+			ext_vref = true;
+		}
 	}
 
+	if (client->dev.of_node)
+		chip = (enum ads7828_chips)
+			of_device_get_match_data(&client->dev);
+	else
+		chip = id->driver_data;
+
 	/* Bound Vref with min/max values */
 	vref_mv = clamp_val(vref_mv, ADS7828_EXT_VREF_MV_MIN,
 			    ADS7828_EXT_VREF_MV_MAX);
 
 	/* ADS7828 uses 12-bit samples, while ADS7830 is 8-bit */
-	if (id->driver_data == ads7828) {
+	if (chip == ads7828) {
 		data->lsb_resol = DIV_ROUND_CLOSEST(vref_mv * 1000, 4096);
 		data->regmap = devm_regmap_init_i2c(client,
 						    &ads2828_regmap_config);
@@ -177,9 +200,23 @@ static const struct i2c_device_id ads7828_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ads7828_device_ids);
 
+static const struct of_device_id ads7828_of_match[] = {
+	{
+		.compatible = "ti,ads7828",
+		.data = (void *)ads7828
+	},
+	{
+		.compatible = "ti,ads7830",
+		.data = (void *)ads7830
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ads7828_of_match);
+
 static struct i2c_driver ads7828_driver = {
 	.driver = {
 		.name = "ads7828",
+		.of_match_table = of_match_ptr(ads7828_of_match),
 	},
 
 	.id_table = ads7828_device_ids,
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index c646670b9ea9..c803e3c5fcd4 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
@@ -58,6 +59,8 @@
 #define REG_VENDID		0x3E
 #define REG_DEVID2		0x3F
 
+#define REG_CONFIG1		0x40
+
 #define REG_STATUS1		0x41
 #define REG_STATUS2		0x42
 
@@ -161,6 +164,27 @@ static const struct i2c_device_id adt7475_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, adt7475_id);
 
+static const struct of_device_id adt7475_of_match[] = {
+	{
+		.compatible = "adi,adt7473",
+		.data = (void *)adt7473
+	},
+	{
+		.compatible = "adi,adt7475",
+		.data = (void *)adt7475
+	},
+	{
+		.compatible = "adi,adt7476",
+		.data = (void *)adt7476
+	},
+	{
+		.compatible = "adi,adt7490",
+		.data = (void *)adt7490
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, adt7475_of_match);
+
 struct adt7475_data {
 	struct device *hwmon_dev;
 	struct mutex lock;
@@ -1250,6 +1274,7 @@ static void adt7475_remove_files(struct i2c_client *client,
 static int adt7475_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
+	enum chips chip;
 	static const char * const names[] = {
 		[adt7473] = "ADT7473",
 		[adt7475] = "ADT7475",
@@ -1268,8 +1293,13 @@ static int adt7475_probe(struct i2c_client *client,
 	mutex_init(&data->lock);
 	i2c_set_clientdata(client, data);
 
+	if (client->dev.of_node)
+		chip = (enum chips)of_device_get_match_data(&client->dev);
+	else
+		chip = id->driver_data;
+
 	/* Initialize device-specific values */
-	switch (id->driver_data) {
+	switch (chip) {
 	case adt7476:
 		data->has_voltage = 0x0e;	/* in1 to in3 */
 		revision = adt7475_read(REG_DEVID2) & 0x07;
@@ -1343,6 +1373,17 @@ static int adt7475_probe(struct i2c_client *client,
 	for (i = 0; i < ADT7475_PWM_COUNT; i++)
 		adt7475_read_pwm(client, i);
 
+	/* Start monitoring */
+	switch (chip) {
+	case adt7475:
+	case adt7476:
+		i2c_smbus_write_byte_data(client, REG_CONFIG1,
+					  adt7475_read(REG_CONFIG1) | 0x01);
+		break;
+	default:
+		break;
+	}
+
 	ret = sysfs_create_group(&client->dev.kobj, &adt7475_attr_group);
 	if (ret)
 		return ret;
@@ -1428,6 +1469,7 @@ static struct i2c_driver adt7475_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "adt7475",
+		.of_match_table = of_match_ptr(adt7475_of_match),
 	},
 	.probe		= adt7475_probe,
 	.remove		= adt7475_remove,
diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c
new file mode 100644
index 000000000000..48403a2115be
--- /dev/null
+++ b/drivers/hwmon/aspeed-pwm-tacho.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2016 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or later as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/delay.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/sysfs.h>
+#include <linux/regmap.h>
+
+/* ASPEED PWM & FAN Tach Register Definition */
+#define ASPEED_PTCR_CTRL		0x00
+#define ASPEED_PTCR_CLK_CTRL		0x04
+#define ASPEED_PTCR_DUTY0_CTRL		0x08
+#define ASPEED_PTCR_DUTY1_CTRL		0x0c
+#define ASPEED_PTCR_TYPEM_CTRL		0x10
+#define ASPEED_PTCR_TYPEM_CTRL1		0x14
+#define ASPEED_PTCR_TYPEN_CTRL		0x18
+#define ASPEED_PTCR_TYPEN_CTRL1		0x1c
+#define ASPEED_PTCR_TACH_SOURCE		0x20
+#define ASPEED_PTCR_TRIGGER		0x28
+#define ASPEED_PTCR_RESULT		0x2c
+#define ASPEED_PTCR_INTR_CTRL		0x30
+#define ASPEED_PTCR_INTR_STS		0x34
+#define ASPEED_PTCR_TYPEM_LIMIT		0x38
+#define ASPEED_PTCR_TYPEN_LIMIT		0x3C
+#define ASPEED_PTCR_CTRL_EXT		0x40
+#define ASPEED_PTCR_CLK_CTRL_EXT	0x44
+#define ASPEED_PTCR_DUTY2_CTRL		0x48
+#define ASPEED_PTCR_DUTY3_CTRL		0x4c
+#define ASPEED_PTCR_TYPEO_CTRL		0x50
+#define ASPEED_PTCR_TYPEO_CTRL1		0x54
+#define ASPEED_PTCR_TACH_SOURCE_EXT	0x60
+#define ASPEED_PTCR_TYPEO_LIMIT		0x78
+
+/* ASPEED_PTCR_CTRL : 0x00 - General Control Register */
+#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1	15
+#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2	6
+#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK	(BIT(7) | BIT(15))
+
+#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1	14
+#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2	5
+#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK	(BIT(6) | BIT(14))
+
+#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1	13
+#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2	4
+#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK	(BIT(5) | BIT(13))
+
+#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1	12
+#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2	3
+#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK	(BIT(4) | BIT(12))
+
+#define	ASPEED_PTCR_CTRL_FAN_NUM_EN(x)	BIT(16 + (x))
+
+#define	ASPEED_PTCR_CTRL_PWMD_EN	BIT(11)
+#define	ASPEED_PTCR_CTRL_PWMC_EN	BIT(10)
+#define	ASPEED_PTCR_CTRL_PWMB_EN	BIT(9)
+#define	ASPEED_PTCR_CTRL_PWMA_EN	BIT(8)
+
+#define	ASPEED_PTCR_CTRL_CLK_SRC	BIT(1)
+#define	ASPEED_PTCR_CTRL_CLK_EN		BIT(0)
+
+/* ASPEED_PTCR_CLK_CTRL : 0x04 - Clock Control Register */
+/* TYPE N */
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_MASK		GENMASK(31, 16)
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT		24
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_H		20
+#define ASPEED_PTCR_CLK_CTRL_TYPEN_L		16
+/* TYPE M */
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_MASK         GENMASK(15, 0)
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT		8
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_H		4
+#define ASPEED_PTCR_CLK_CTRL_TYPEM_L		0
+
+/*
+ * ASPEED_PTCR_DUTY_CTRL/1/2/3 : 0x08/0x0C/0x48/0x4C - PWM-FAN duty control
+ * 0/1/2/3 register
+ */
+#define DUTY_CTRL_PWM2_FALL_POINT	24
+#define DUTY_CTRL_PWM2_RISE_POINT	16
+#define DUTY_CTRL_PWM2_RISE_FALL_MASK	GENMASK(31, 16)
+#define DUTY_CTRL_PWM1_FALL_POINT	8
+#define DUTY_CTRL_PWM1_RISE_POINT	0
+#define DUTY_CTRL_PWM1_RISE_FALL_MASK   GENMASK(15, 0)
+
+/* ASPEED_PTCR_TYPEM_CTRL : 0x10/0x18/0x50 - Type M/N/O Ctrl 0 Register */
+#define TYPE_CTRL_FAN_MASK		(GENMASK(5, 1) | GENMASK(31, 16))
+#define TYPE_CTRL_FAN1_MASK		GENMASK(31, 0)
+#define TYPE_CTRL_FAN_PERIOD		16
+#define TYPE_CTRL_FAN_MODE		4
+#define TYPE_CTRL_FAN_DIVISION		1
+#define TYPE_CTRL_FAN_TYPE_EN		1
+
+/* ASPEED_PTCR_TACH_SOURCE : 0x20/0x60 - Tach Source Register */
+/* bit [0,1] at 0x20, bit [2] at 0x60 */
+#define TACH_PWM_SOURCE_BIT01(x)	((x) * 2)
+#define TACH_PWM_SOURCE_BIT2(x)		((x) * 2)
+#define TACH_PWM_SOURCE_MASK_BIT01(x)	(0x3 << ((x) * 2))
+#define TACH_PWM_SOURCE_MASK_BIT2(x)	BIT((x) * 2)
+
+/* ASPEED_PTCR_RESULT : 0x2c - Result Register */
+#define RESULT_STATUS_MASK		BIT(31)
+#define RESULT_VALUE_MASK		0xfffff
+
+/* ASPEED_PTCR_CTRL_EXT : 0x40 - General Control Extension #1 Register */
+#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1	15
+#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2	6
+#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK	(BIT(7) | BIT(15))
+
+#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1	14
+#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2	5
+#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK	(BIT(6) | BIT(14))
+
+#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1	13
+#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2	4
+#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK	(BIT(5) | BIT(13))
+
+#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1	12
+#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2	3
+#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK	(BIT(4) | BIT(12))
+
+#define	ASPEED_PTCR_CTRL_PWMH_EN	BIT(11)
+#define	ASPEED_PTCR_CTRL_PWMG_EN	BIT(10)
+#define	ASPEED_PTCR_CTRL_PWMF_EN	BIT(9)
+#define	ASPEED_PTCR_CTRL_PWME_EN	BIT(8)
+
+/* ASPEED_PTCR_CLK_EXT_CTRL : 0x44 - Clock Control Extension #1 Register */
+/* TYPE O */
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_MASK         GENMASK(15, 0)
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT		8
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_H		4
+#define ASPEED_PTCR_CLK_CTRL_TYPEO_L		0
+
+#define PWM_MAX 255
+
+#define M_PWM_DIV_H 0x00
+#define M_PWM_DIV_L 0x05
+#define M_PWM_PERIOD 0x5F
+#define M_TACH_CLK_DIV 0x00
+#define M_TACH_MODE 0x00
+#define M_TACH_UNIT 0x1000
+#define INIT_FAN_CTRL 0xFF
+
+struct aspeed_pwm_tacho_data {
+	struct regmap *regmap;
+	unsigned long clk_freq;
+	bool pwm_present[8];
+	bool fan_tach_present[16];
+	u8 type_pwm_clock_unit[3];
+	u8 type_pwm_clock_division_h[3];
+	u8 type_pwm_clock_division_l[3];
+	u8 type_fan_tach_clock_division[3];
+	u16 type_fan_tach_unit[3];
+	u8 pwm_port_type[8];
+	u8 pwm_port_fan_ctrl[8];
+	u8 fan_tach_ch_source[16];
+	const struct attribute_group *groups[3];
+};
+
+enum type { TYPEM, TYPEN, TYPEO };
+
+struct type_params {
+	u32 l_value;
+	u32 h_value;
+	u32 unit_value;
+	u32 clk_ctrl_mask;
+	u32 clk_ctrl_reg;
+	u32 ctrl_reg;
+	u32 ctrl_reg1;
+};
+
+static const struct type_params type_params[] = {
+	[TYPEM] = {
+		.l_value = ASPEED_PTCR_CLK_CTRL_TYPEM_L,
+		.h_value = ASPEED_PTCR_CLK_CTRL_TYPEM_H,
+		.unit_value = ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT,
+		.clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEM_MASK,
+		.clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL,
+		.ctrl_reg = ASPEED_PTCR_TYPEM_CTRL,
+		.ctrl_reg1 = ASPEED_PTCR_TYPEM_CTRL1,
+	},
+	[TYPEN] = {
+		.l_value = ASPEED_PTCR_CLK_CTRL_TYPEN_L,
+		.h_value = ASPEED_PTCR_CLK_CTRL_TYPEN_H,
+		.unit_value = ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT,
+		.clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEN_MASK,
+		.clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL,
+		.ctrl_reg = ASPEED_PTCR_TYPEN_CTRL,
+		.ctrl_reg1 = ASPEED_PTCR_TYPEN_CTRL1,
+	},
+	[TYPEO] = {
+		.l_value = ASPEED_PTCR_CLK_CTRL_TYPEO_L,
+		.h_value = ASPEED_PTCR_CLK_CTRL_TYPEO_H,
+		.unit_value = ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT,
+		.clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEO_MASK,
+		.clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL_EXT,
+		.ctrl_reg = ASPEED_PTCR_TYPEO_CTRL,
+		.ctrl_reg1 = ASPEED_PTCR_TYPEO_CTRL1,
+	}
+};
+
+enum pwm_port { PWMA, PWMB, PWMC, PWMD, PWME, PWMF, PWMG, PWMH };
+
+struct pwm_port_params {
+	u32 pwm_en;
+	u32 ctrl_reg;
+	u32 type_part1;
+	u32 type_part2;
+	u32 type_mask;
+	u32 duty_ctrl_rise_point;
+	u32 duty_ctrl_fall_point;
+	u32 duty_ctrl_reg;
+	u32 duty_ctrl_rise_fall_mask;
+};
+
+static const struct pwm_port_params pwm_port_params[] = {
+	[PWMA] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWMA_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+	},
+	[PWMB] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWMB_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+	},
+	[PWMC] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWMC_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+	},
+	[PWMD] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWMD_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+	},
+	[PWME] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWME_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+	},
+	[PWMF] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWMF_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+	},
+	[PWMG] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWMG_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK,
+	},
+	[PWMH] = {
+		.pwm_en = ASPEED_PTCR_CTRL_PWMH_EN,
+		.ctrl_reg = ASPEED_PTCR_CTRL_EXT,
+		.type_part1 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1,
+		.type_part2 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2,
+		.type_mask = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK,
+		.duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT,
+		.duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT,
+		.duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL,
+		.duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK,
+	}
+};
+
+static int regmap_aspeed_pwm_tacho_reg_write(void *context, unsigned int reg,
+					     unsigned int val)
+{
+	void __iomem *regs = (void __iomem *)context;
+
+	writel(val, regs + reg);
+	return 0;
+}
+
+static int regmap_aspeed_pwm_tacho_reg_read(void *context, unsigned int reg,
+					    unsigned int *val)
+{
+	void __iomem *regs = (void __iomem *)context;
+
+	*val = readl(regs + reg);
+	return 0;
+}
+
+static const struct regmap_config aspeed_pwm_tacho_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = ASPEED_PTCR_TYPEO_LIMIT,
+	.reg_write = regmap_aspeed_pwm_tacho_reg_write,
+	.reg_read = regmap_aspeed_pwm_tacho_reg_read,
+	.fast_io = true,
+};
+
+static void aspeed_set_clock_enable(struct regmap *regmap, bool val)
+{
+	regmap_update_bits(regmap, ASPEED_PTCR_CTRL,
+			   ASPEED_PTCR_CTRL_CLK_EN,
+			   val ? ASPEED_PTCR_CTRL_CLK_EN : 0);
+}
+
+static void aspeed_set_clock_source(struct regmap *regmap, int val)
+{
+	regmap_update_bits(regmap, ASPEED_PTCR_CTRL,
+			   ASPEED_PTCR_CTRL_CLK_SRC,
+			   val ? ASPEED_PTCR_CTRL_CLK_SRC : 0);
+}
+
+static void aspeed_set_pwm_clock_values(struct regmap *regmap, u8 type,
+					u8 div_high, u8 div_low, u8 unit)
+{
+	u32 reg_value = ((div_high << type_params[type].h_value) |
+			 (div_low << type_params[type].l_value) |
+			 (unit << type_params[type].unit_value));
+
+	regmap_update_bits(regmap, type_params[type].clk_ctrl_reg,
+			   type_params[type].clk_ctrl_mask, reg_value);
+}
+
+static void aspeed_set_pwm_port_enable(struct regmap *regmap, u8 pwm_port,
+				       bool enable)
+{
+	regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg,
+			   pwm_port_params[pwm_port].pwm_en,
+			   enable ? pwm_port_params[pwm_port].pwm_en : 0);
+}
+
+static void aspeed_set_pwm_port_type(struct regmap *regmap,
+				     u8 pwm_port, u8 type)
+{
+	u32 reg_value = (type & 0x1) << pwm_port_params[pwm_port].type_part1;
+
+	reg_value |= (type & 0x2) << pwm_port_params[pwm_port].type_part2;
+
+	regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg,
+			   pwm_port_params[pwm_port].type_mask, reg_value);
+}
+
+static void aspeed_set_pwm_port_duty_rising_falling(struct regmap *regmap,
+						    u8 pwm_port, u8 rising,
+						    u8 falling)
+{
+	u32 reg_value = (rising <<
+			 pwm_port_params[pwm_port].duty_ctrl_rise_point);
+	reg_value |= (falling <<
+		      pwm_port_params[pwm_port].duty_ctrl_fall_point);
+
+	regmap_update_bits(regmap, pwm_port_params[pwm_port].duty_ctrl_reg,
+			   pwm_port_params[pwm_port].duty_ctrl_rise_fall_mask,
+			   reg_value);
+}
+
+static void aspeed_set_tacho_type_enable(struct regmap *regmap, u8 type,
+					 bool enable)
+{
+	regmap_update_bits(regmap, type_params[type].ctrl_reg,
+			   TYPE_CTRL_FAN_TYPE_EN,
+			   enable ? TYPE_CTRL_FAN_TYPE_EN : 0);
+}
+
+static void aspeed_set_tacho_type_values(struct regmap *regmap, u8 type,
+					 u8 mode, u16 unit, u8 division)
+{
+	u32 reg_value = ((mode << TYPE_CTRL_FAN_MODE) |
+			 (unit << TYPE_CTRL_FAN_PERIOD) |
+			 (division << TYPE_CTRL_FAN_DIVISION));
+
+	regmap_update_bits(regmap, type_params[type].ctrl_reg,
+			   TYPE_CTRL_FAN_MASK, reg_value);
+	regmap_update_bits(regmap, type_params[type].ctrl_reg1,
+			   TYPE_CTRL_FAN1_MASK, unit << 16);
+}
+
+static void aspeed_set_fan_tach_ch_enable(struct regmap *regmap, u8 fan_tach_ch,
+					  bool enable)
+{
+	regmap_update_bits(regmap, ASPEED_PTCR_CTRL,
+			   ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch),
+			   enable ?
+			   ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch) : 0);
+}
+
+static void aspeed_set_fan_tach_ch_source(struct regmap *regmap, u8 fan_tach_ch,
+					  u8 fan_tach_ch_source)
+{
+	u32 reg_value1 = ((fan_tach_ch_source & 0x3) <<
+			  TACH_PWM_SOURCE_BIT01(fan_tach_ch));
+	u32 reg_value2 = (((fan_tach_ch_source & 0x4) >> 2) <<
+			  TACH_PWM_SOURCE_BIT2(fan_tach_ch));
+
+	regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE,
+			   TACH_PWM_SOURCE_MASK_BIT01(fan_tach_ch),
+			   reg_value1);
+
+	regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE_EXT,
+			   TACH_PWM_SOURCE_MASK_BIT2(fan_tach_ch),
+			   reg_value2);
+}
+
+static void aspeed_set_pwm_port_fan_ctrl(struct aspeed_pwm_tacho_data *priv,
+					 u8 index, u8 fan_ctrl)
+{
+	u16 period, dc_time_on;
+
+	period = priv->type_pwm_clock_unit[priv->pwm_port_type[index]];
+	period += 1;
+	dc_time_on = (fan_ctrl * period) / PWM_MAX;
+
+	if (dc_time_on == 0) {
+		aspeed_set_pwm_port_enable(priv->regmap, index, false);
+	} else {
+		if (dc_time_on == period)
+			dc_time_on = 0;
+
+		aspeed_set_pwm_port_duty_rising_falling(priv->regmap, index, 0,
+							dc_time_on);
+		aspeed_set_pwm_port_enable(priv->regmap, index, true);
+	}
+}
+
+static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data
+						 *priv, u8 type)
+{
+	u32 clk;
+	u16 tacho_unit;
+	u8 clk_unit, div_h, div_l, tacho_div;
+
+	clk = priv->clk_freq;
+	clk_unit = priv->type_pwm_clock_unit[type];
+	div_h = priv->type_pwm_clock_division_h[type];
+	div_h = 0x1 << div_h;
+	div_l = priv->type_pwm_clock_division_l[type];
+	if (div_l == 0)
+		div_l = 1;
+	else
+		div_l = div_l * 2;
+
+	tacho_unit = priv->type_fan_tach_unit[type];
+	tacho_div = priv->type_fan_tach_clock_division[type];
+
+	tacho_div = 0x4 << (tacho_div * 2);
+	return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit);
+}
+
+static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
+				      u8 fan_tach_ch)
+{
+	u32 raw_data, tach_div, clk_source, sec, val;
+	u8 fan_tach_ch_source, type;
+
+	regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0);
+	regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch);
+
+	fan_tach_ch_source = priv->fan_tach_ch_source[fan_tach_ch];
+	type = priv->pwm_port_type[fan_tach_ch_source];
+
+	sec = (1000 / aspeed_get_fan_tach_ch_measure_period(priv, type));
+	msleep(sec);
+
+	regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val);
+	raw_data = val & RESULT_VALUE_MASK;
+	tach_div = priv->type_fan_tach_clock_division[type];
+	tach_div = 0x4 << (tach_div * 2);
+	clk_source = priv->clk_freq;
+
+	if (raw_data == 0)
+		return 0;
+
+	return (clk_source * 60) / (2 * raw_data * tach_div);
+}
+
+static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	int index = sensor_attr->index;
+	int ret;
+	struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+	long fan_ctrl;
+
+	ret = kstrtol(buf, 10, &fan_ctrl);
+	if (ret != 0)
+		return ret;
+
+	if (fan_ctrl < 0 || fan_ctrl > PWM_MAX)
+		return -EINVAL;
+
+	if (priv->pwm_port_fan_ctrl[index] == fan_ctrl)
+		return count;
+
+	priv->pwm_port_fan_ctrl[index] = fan_ctrl;
+	aspeed_set_pwm_port_fan_ctrl(priv, index, fan_ctrl);
+
+	return count;
+}
+
+static ssize_t show_pwm(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	int index = sensor_attr->index;
+	struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", priv->pwm_port_fan_ctrl[index]);
+}
+
+static ssize_t show_rpm(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	int index = sensor_attr->index;
+	u32 rpm;
+	struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+	rpm = aspeed_get_fan_tach_ch_rpm(priv, index);
+
+	return sprintf(buf, "%u\n", rpm);
+}
+
+static umode_t pwm_is_visible(struct kobject *kobj,
+			      struct attribute *a, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+	if (!priv->pwm_present[index])
+		return 0;
+	return a->mode;
+}
+
+static umode_t fan_dev_is_visible(struct kobject *kobj,
+				  struct attribute *a, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
+
+	if (!priv->fan_tach_present[index])
+		return 0;
+	return a->mode;
+}
+
+static SENSOR_DEVICE_ATTR(pwm0, 0644,
+			show_pwm, set_pwm, 0);
+static SENSOR_DEVICE_ATTR(pwm1, 0644,
+			show_pwm, set_pwm, 1);
+static SENSOR_DEVICE_ATTR(pwm2, 0644,
+			show_pwm, set_pwm, 2);
+static SENSOR_DEVICE_ATTR(pwm3, 0644,
+			show_pwm, set_pwm, 3);
+static SENSOR_DEVICE_ATTR(pwm4, 0644,
+			show_pwm, set_pwm, 4);
+static SENSOR_DEVICE_ATTR(pwm5, 0644,
+			show_pwm, set_pwm, 5);
+static SENSOR_DEVICE_ATTR(pwm6, 0644,
+			show_pwm, set_pwm, 6);
+static SENSOR_DEVICE_ATTR(pwm7, 0644,
+			show_pwm, set_pwm, 7);
+static struct attribute *pwm_dev_attrs[] = {
+	&sensor_dev_attr_pwm0.dev_attr.attr,
+	&sensor_dev_attr_pwm1.dev_attr.attr,
+	&sensor_dev_attr_pwm2.dev_attr.attr,
+	&sensor_dev_attr_pwm3.dev_attr.attr,
+	&sensor_dev_attr_pwm4.dev_attr.attr,
+	&sensor_dev_attr_pwm5.dev_attr.attr,
+	&sensor_dev_attr_pwm6.dev_attr.attr,
+	&sensor_dev_attr_pwm7.dev_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group pwm_dev_group = {
+	.attrs = pwm_dev_attrs,
+	.is_visible = pwm_is_visible,
+};
+
+static SENSOR_DEVICE_ATTR(fan0_input, 0444,
+		show_rpm, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_input, 0444,
+		show_rpm, NULL, 1);
+static SENSOR_DEVICE_ATTR(fan2_input, 0444,
+		show_rpm, NULL, 2);
+static SENSOR_DEVICE_ATTR(fan3_input, 0444,
+		show_rpm, NULL, 3);
+static SENSOR_DEVICE_ATTR(fan4_input, 0444,
+		show_rpm, NULL, 4);
+static SENSOR_DEVICE_ATTR(fan5_input, 0444,
+		show_rpm, NULL, 5);
+static SENSOR_DEVICE_ATTR(fan6_input, 0444,
+		show_rpm, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan7_input, 0444,
+		show_rpm, NULL, 7);
+static SENSOR_DEVICE_ATTR(fan8_input, 0444,
+		show_rpm, NULL, 8);
+static SENSOR_DEVICE_ATTR(fan9_input, 0444,
+		show_rpm, NULL, 9);
+static SENSOR_DEVICE_ATTR(fan10_input, 0444,
+		show_rpm, NULL, 10);
+static SENSOR_DEVICE_ATTR(fan11_input, 0444,
+		show_rpm, NULL, 11);
+static SENSOR_DEVICE_ATTR(fan12_input, 0444,
+		show_rpm, NULL, 12);
+static SENSOR_DEVICE_ATTR(fan13_input, 0444,
+		show_rpm, NULL, 13);
+static SENSOR_DEVICE_ATTR(fan14_input, 0444,
+		show_rpm, NULL, 14);
+static SENSOR_DEVICE_ATTR(fan15_input, 0444,
+		show_rpm, NULL, 15);
+static struct attribute *fan_dev_attrs[] = {
+	&sensor_dev_attr_fan0_input.dev_attr.attr,
+	&sensor_dev_attr_fan1_input.dev_attr.attr,
+	&sensor_dev_attr_fan2_input.dev_attr.attr,
+	&sensor_dev_attr_fan3_input.dev_attr.attr,
+	&sensor_dev_attr_fan4_input.dev_attr.attr,
+	&sensor_dev_attr_fan5_input.dev_attr.attr,
+	&sensor_dev_attr_fan6_input.dev_attr.attr,
+	&sensor_dev_attr_fan7_input.dev_attr.attr,
+	&sensor_dev_attr_fan8_input.dev_attr.attr,
+	&sensor_dev_attr_fan9_input.dev_attr.attr,
+	&sensor_dev_attr_fan10_input.dev_attr.attr,
+	&sensor_dev_attr_fan11_input.dev_attr.attr,
+	&sensor_dev_attr_fan12_input.dev_attr.attr,
+	&sensor_dev_attr_fan13_input.dev_attr.attr,
+	&sensor_dev_attr_fan14_input.dev_attr.attr,
+	&sensor_dev_attr_fan15_input.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group fan_dev_group = {
+	.attrs = fan_dev_attrs,
+	.is_visible = fan_dev_is_visible,
+};
+
+/*
+ * The clock type is type M :
+ * The PWM frequency = 24MHz / (type M clock division L bit *
+ * type M clock division H bit * (type M PWM period bit + 1))
+ */
+static void aspeed_create_type(struct aspeed_pwm_tacho_data *priv)
+{
+	priv->type_pwm_clock_division_h[TYPEM] = M_PWM_DIV_H;
+	priv->type_pwm_clock_division_l[TYPEM] = M_PWM_DIV_L;
+	priv->type_pwm_clock_unit[TYPEM] = M_PWM_PERIOD;
+	aspeed_set_pwm_clock_values(priv->regmap, TYPEM, M_PWM_DIV_H,
+				    M_PWM_DIV_L, M_PWM_PERIOD);
+	aspeed_set_tacho_type_enable(priv->regmap, TYPEM, true);
+	priv->type_fan_tach_clock_division[TYPEM] = M_TACH_CLK_DIV;
+	priv->type_fan_tach_unit[TYPEM] = M_TACH_UNIT;
+	aspeed_set_tacho_type_values(priv->regmap, TYPEM, M_TACH_MODE,
+				     M_TACH_UNIT, M_TACH_CLK_DIV);
+}
+
+static void aspeed_create_pwm_port(struct aspeed_pwm_tacho_data *priv,
+				   u8 pwm_port)
+{
+	aspeed_set_pwm_port_enable(priv->regmap, pwm_port, true);
+	priv->pwm_present[pwm_port] = true;
+
+	priv->pwm_port_type[pwm_port] = TYPEM;
+	aspeed_set_pwm_port_type(priv->regmap, pwm_port, TYPEM);
+
+	priv->pwm_port_fan_ctrl[pwm_port] = INIT_FAN_CTRL;
+	aspeed_set_pwm_port_fan_ctrl(priv, pwm_port, INIT_FAN_CTRL);
+}
+
+static void aspeed_create_fan_tach_channel(struct aspeed_pwm_tacho_data *priv,
+					   u8 *fan_tach_ch,
+					   int count,
+					   u8 pwm_source)
+{
+	u8 val, index;
+
+	for (val = 0; val < count; val++) {
+		index = fan_tach_ch[val];
+		aspeed_set_fan_tach_ch_enable(priv->regmap, index, true);
+		priv->fan_tach_present[index] = true;
+		priv->fan_tach_ch_source[index] = pwm_source;
+		aspeed_set_fan_tach_ch_source(priv->regmap, index, pwm_source);
+	}
+}
+
+static int aspeed_create_fan(struct device *dev,
+			     struct device_node *child,
+			     struct aspeed_pwm_tacho_data *priv)
+{
+	u8 *fan_tach_ch;
+	u32 pwm_port;
+	int ret, count;
+
+	ret = of_property_read_u32(child, "reg", &pwm_port);
+	if (ret)
+		return ret;
+	aspeed_create_pwm_port(priv, (u8)pwm_port);
+
+	count = of_property_count_u8_elems(child, "aspeed,fan-tach-ch");
+	if (count < 1)
+		return -EINVAL;
+	fan_tach_ch = devm_kzalloc(dev, sizeof(*fan_tach_ch) * count,
+				   GFP_KERNEL);
+	if (!fan_tach_ch)
+		return -ENOMEM;
+	ret = of_property_read_u8_array(child, "aspeed,fan-tach-ch",
+					fan_tach_ch, count);
+	if (ret)
+		return ret;
+	aspeed_create_fan_tach_channel(priv, fan_tach_ch, count, pwm_port);
+
+	return 0;
+}
+
+static int aspeed_pwm_tacho_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np, *child;
+	struct aspeed_pwm_tacho_data *priv;
+	void __iomem *regs;
+	struct resource *res;
+	struct device *hwmon;
+	struct clk *clk;
+	int ret;
+
+	np = dev->of_node;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENOENT;
+	regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs,
+			&aspeed_pwm_tacho_regmap_config);
+	if (IS_ERR(priv->regmap))
+		return PTR_ERR(priv->regmap);
+	regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE, 0);
+	regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE_EXT, 0);
+
+	clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(clk))
+		return -ENODEV;
+	priv->clk_freq = clk_get_rate(clk);
+	aspeed_set_clock_enable(priv->regmap, true);
+	aspeed_set_clock_source(priv->regmap, 0);
+
+	aspeed_create_type(priv);
+
+	for_each_child_of_node(np, child) {
+		ret = aspeed_create_fan(dev, child, priv);
+		of_node_put(child);
+		if (ret)
+			return ret;
+	}
+	of_node_put(np);
+
+	priv->groups[0] = &pwm_dev_group;
+	priv->groups[1] = &fan_dev_group;
+	priv->groups[2] = NULL;
+	hwmon = devm_hwmon_device_register_with_groups(dev,
+						       "aspeed_pwm_tacho",
+						       priv, priv->groups);
+	return PTR_ERR_OR_ZERO(hwmon);
+}
+
+static const struct of_device_id of_pwm_tacho_match_table[] = {
+	{ .compatible = "aspeed,ast2400-pwm-tacho", },
+	{ .compatible = "aspeed,ast2500-pwm-tacho", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_pwm_tacho_match_table);
+
+static struct platform_driver aspeed_pwm_tacho_driver = {
+	.probe		= aspeed_pwm_tacho_probe,
+	.driver		= {
+		.name	= "aspeed_pwm_tacho",
+		.of_match_table = of_pwm_tacho_match_table,
+	},
+};
+
+module_platform_driver(aspeed_pwm_tacho_driver);
+
+MODULE_AUTHOR("Jaghathiswari Rankappagounder Natarajan <jaghu@google.com>");
+MODULE_DESCRIPTION("ASPEED PWM and Fan Tacho device driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 34704b0451b4..3189246302a6 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -995,6 +995,13 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 		},
 		.driver_data = (void *)&i8k_config_data[DELL_XPS],
 	},
+	{
+		.ident = "Dell XPS 15 9560",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "XPS 15 9560"),
+		},
+	},
 	{ }
 };
 
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 28375d59cc36..dd6e17c1076b 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -186,7 +186,7 @@ static ssize_t hwmon_attr_show_string(struct device *dev,
 				      char *buf)
 {
 	struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
-	char *s;
+	const char *s;
 	int ret;
 
 	ret = hattr->ops->read_string(dev, hattr->type, hattr->attr,
diff --git a/drivers/hwmon/ina209.c b/drivers/hwmon/ina209.c
index 5378fdefc1f7..aa0768ce8aea 100644
--- a/drivers/hwmon/ina209.c
+++ b/drivers/hwmon/ina209.c
@@ -117,7 +117,7 @@ static long ina209_from_reg(const u8 reg, const u16 val)
 	case INA209_SHUNT_VOLTAGE_POS_WARN:
 	case INA209_SHUNT_VOLTAGE_NEG_WARN:
 		/* LSB=10 uV. Convert to mV. */
-		return DIV_ROUND_CLOSEST(val, 100);
+		return DIV_ROUND_CLOSEST((s16)val, 100);
 
 	case INA209_BUS_VOLTAGE:
 	case INA209_BUS_VOLTAGE_MAX_PEAK:
@@ -146,7 +146,7 @@ static long ina209_from_reg(const u8 reg, const u16 val)
 
 	case INA209_CURRENT:
 		/* LSB=1 mA (selected). Is in mA */
-		return val;
+		return (s16)val;
 	}
 
 	/* programmer goofed */
@@ -608,11 +608,18 @@ static const struct i2c_device_id ina209_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ina209_id);
 
+static const struct of_device_id ina209_of_match[] = {
+	{ .compatible = "ti,ina209" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ina209_of_match);
+
 /* This is the driver that will be inserted */
 static struct i2c_driver ina209_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "ina209",
+		.of_match_table = of_match_ptr(ina209_of_match),
 	},
 	.probe		= ina209_probe,
 	.remove		= ina209_remove,
diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index b24f1d3045f0..62e38fa8cda2 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -34,6 +34,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/jiffies.h>
+#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/delay.h>
 #include <linux/util_macros.h>
@@ -424,13 +425,19 @@ static int ina2xx_probe(struct i2c_client *client,
 	struct device *hwmon_dev;
 	u32 val;
 	int ret, group = 0;
+	enum ina2xx_ids chip;
+
+	if (client->dev.of_node)
+		chip = (enum ina2xx_ids)of_device_get_match_data(&client->dev);
+	else
+		chip = id->driver_data;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
 	/* set the device type */
-	data->config = &ina2xx_config[id->driver_data];
+	data->config = &ina2xx_config[chip];
 
 	if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) {
 		struct ina2xx_platform_data *pdata = dev_get_platdata(dev);
@@ -487,9 +494,35 @@ static const struct i2c_device_id ina2xx_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ina2xx_id);
 
+static const struct of_device_id ina2xx_of_match[] = {
+	{
+		.compatible = "ti,ina219",
+		.data = (void *)ina219
+	},
+	{
+		.compatible = "ti,ina220",
+		.data = (void *)ina219
+	},
+	{
+		.compatible = "ti,ina226",
+		.data = (void *)ina226
+	},
+	{
+		.compatible = "ti,ina230",
+		.data = (void *)ina226
+	},
+	{
+		.compatible = "ti,ina231",
+		.data = (void *)ina226
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ina2xx_of_match);
+
 static struct i2c_driver ina2xx_driver = {
 	.driver = {
 		.name	= "ina2xx",
+		.of_match_table = of_match_ptr(ina2xx_of_match),
 	},
 	.probe		= ina2xx_probe,
 	.id_table	= ina2xx_id,
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c
index 2e1948699114..4c1770920d29 100644
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -46,6 +46,7 @@
 #include <linux/hwmon.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/of_device.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
@@ -1115,6 +1116,10 @@ static int lm63_probe(struct i2c_client *client,
 	mutex_init(&data->update_lock);
 
 	/* Set the device type */
+	if (client->dev.of_node)
+		data->kind = (enum chips)of_device_get_match_data(&client->dev);
+	else
+		data->kind = id->driver_data;
 	data->kind = id->driver_data;
 	if (data->kind == lm64)
 		data->temp2_offset = 16000;
@@ -1149,10 +1154,28 @@ static const struct i2c_device_id lm63_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lm63_id);
 
+static const struct of_device_id lm63_of_match[] = {
+	{
+		.compatible = "national,lm63",
+		.data = (void *)lm63
+	},
+	{
+		.compatible = "national,lm64",
+		.data = (void *)lm64
+	},
+	{
+		.compatible = "national,lm96163",
+		.data = (void *)lm96163
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, lm63_of_match);
+
 static struct i2c_driver lm63_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "lm63",
+		.of_match_table = of_match_ptr(lm63_of_match),
 	},
 	.probe		= lm63_probe,
 	.id_table	= lm63_id,
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index eff3b24d8473..005ffb5ffa92 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -26,6 +26,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
+#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/regmap.h>
 #include "lm75.h"
@@ -273,7 +274,12 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	int status, err;
 	u8 set_mask, clr_mask;
 	int new;
-	enum lm75_type kind = id->driver_data;
+	enum lm75_type kind;
+
+	if (client->dev.of_node)
+		kind = (enum lm75_type)of_device_get_match_data(&client->dev);
+	else
+		kind = id->driver_data;
 
 	if (!i2c_check_functionality(client->adapter,
 			I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA))
@@ -424,6 +430,95 @@ static const struct i2c_device_id lm75_ids[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lm75_ids);
 
+static const struct of_device_id lm75_of_match[] = {
+	{
+		.compatible = "adi,adt75",
+		.data = (void *)adt75
+	},
+	{
+		.compatible = "dallas,ds1775",
+		.data = (void *)ds1775
+	},
+	{
+		.compatible = "dallas,ds75",
+		.data = (void *)ds75
+	},
+	{
+		.compatible = "dallas,ds7505",
+		.data = (void *)ds7505
+	},
+	{
+		.compatible = "gmt,g751",
+		.data = (void *)g751
+	},
+	{
+		.compatible = "national,lm75",
+		.data = (void *)lm75
+	},
+	{
+		.compatible = "national,lm75a",
+		.data = (void *)lm75a
+	},
+	{
+		.compatible = "national,lm75b",
+		.data = (void *)lm75b
+	},
+	{
+		.compatible = "maxim,max6625",
+		.data = (void *)max6625
+	},
+	{
+		.compatible = "maxim,max6626",
+		.data = (void *)max6626
+	},
+	{
+		.compatible = "maxim,mcp980x",
+		.data = (void *)mcp980x
+	},
+	{
+		.compatible = "st,stds75",
+		.data = (void *)stds75
+	},
+	{
+		.compatible = "microchip,tcn75",
+		.data = (void *)tcn75
+	},
+	{
+		.compatible = "ti,tmp100",
+		.data = (void *)tmp100
+	},
+	{
+		.compatible = "ti,tmp101",
+		.data = (void *)tmp101
+	},
+	{
+		.compatible = "ti,tmp105",
+		.data = (void *)tmp105
+	},
+	{
+		.compatible = "ti,tmp112",
+		.data = (void *)tmp112
+	},
+	{
+		.compatible = "ti,tmp175",
+		.data = (void *)tmp175
+	},
+	{
+		.compatible = "ti,tmp275",
+		.data = (void *)tmp275
+	},
+	{
+		.compatible = "ti,tmp75",
+		.data = (void *)tmp75
+	},
+	{
+		.compatible = "ti,tmp75c",
+		.data = (void *)tmp75c
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, lm75_of_match);
+
 #define LM75A_ID 0xA1
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
@@ -560,6 +655,7 @@ static struct i2c_driver lm75_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "lm75",
+		.of_match_table = of_match_ptr(lm75_of_match),
 		.pm	= LM75_DEV_PM_OPS,
 	},
 	.probe		= lm75_probe,
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 691469ffa24e..0a325878e8f5 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
@@ -1552,7 +1553,10 @@ static int lm85_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return -ENOMEM;
 
 	data->client = client;
-	data->type = id->driver_data;
+	if (client->dev.of_node)
+		data->type = (enum chips)of_device_get_match_data(&client->dev);
+	else
+		data->type = id->driver_data;
 	mutex_init(&data->update_lock);
 
 	/* Fill in the chip specific driver values */
@@ -1623,10 +1627,60 @@ static const struct i2c_device_id lm85_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lm85_id);
 
+static const struct of_device_id lm85_of_match[] = {
+	{
+		.compatible = "adi,adm1027",
+		.data = (void *)adm1027
+	},
+	{
+		.compatible = "adi,adt7463",
+		.data = (void *)adt7463
+	},
+	{
+		.compatible = "adi,adt7468",
+		.data = (void *)adt7468
+	},
+	{
+		.compatible = "national,lm85",
+		.data = (void *)lm85
+	},
+	{
+		.compatible = "national,lm85b",
+		.data = (void *)lm85
+	},
+	{
+		.compatible = "national,lm85c",
+		.data = (void *)lm85
+	},
+	{
+		.compatible = "smsc,emc6d100",
+		.data = (void *)emc6d100
+	},
+	{
+		.compatible = "smsc,emc6d101",
+		.data = (void *)emc6d100
+	},
+	{
+		.compatible = "smsc,emc6d102",
+		.data = (void *)emc6d102
+	},
+	{
+		.compatible = "smsc,emc6d103",
+		.data = (void *)emc6d103
+	},
+	{
+		.compatible = "smsc,emc6d103s",
+		.data = (void *)emc6d103s
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, lm85_of_match);
+
 static struct i2c_driver lm85_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name   = "lm85",
+		.of_match_table = of_match_ptr(lm85_of_match),
 	},
 	.probe		= lm85_probe,
 	.id_table	= lm85_id,
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index e06faf9d3f0f..b48d30760388 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -66,6 +66,7 @@
 #include <linux/hwmon-vid.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/regulator/consumer.h>
 
 /*
  * Addresses to scan
@@ -74,8 +75,6 @@
 
 static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
-enum chips { lm87, adm1024 };
-
 /*
  * The LM87 registers
  */
@@ -855,8 +854,26 @@ static int lm87_init_client(struct i2c_client *client)
 {
 	struct lm87_data *data = i2c_get_clientdata(client);
 	int rc;
-
-	if (dev_get_platdata(&client->dev)) {
+	struct device_node *of_node = client->dev.of_node;
+	u8 val = 0;
+	struct regulator *vcc = NULL;
+
+	if (of_node) {
+		if (of_property_read_bool(of_node, "has-temp3"))
+			val |= CHAN_TEMP3;
+		if (of_property_read_bool(of_node, "has-in6"))
+			val |= CHAN_NO_FAN(0);
+		if (of_property_read_bool(of_node, "has-in7"))
+			val |= CHAN_NO_FAN(1);
+		vcc = devm_regulator_get_optional(&client->dev, "vcc");
+		if (!IS_ERR(vcc)) {
+			if (regulator_get_voltage(vcc) == 5000000)
+				val |= CHAN_VCC_5V;
+		}
+		data->channel = val;
+		lm87_write_value(client,
+				LM87_REG_CHANNEL_MODE, data->channel);
+	} else if (dev_get_platdata(&client->dev)) {
 		data->channel = *(u8 *)dev_get_platdata(&client->dev);
 		lm87_write_value(client,
 				 LM87_REG_CHANNEL_MODE, data->channel);
@@ -962,16 +979,24 @@ static int lm87_probe(struct i2c_client *client, const struct i2c_device_id *id)
  */
 
 static const struct i2c_device_id lm87_id[] = {
-	{ "lm87", lm87 },
-	{ "adm1024", adm1024 },
+	{ "lm87", 0 },
+	{ "adm1024", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm87_id);
 
+static const struct of_device_id lm87_of_match[] = {
+	{ .compatible = "ti,lm87" },
+	{ .compatible = "adi,adm1024" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, lm87_of_match);
+
 static struct i2c_driver lm87_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "lm87",
+		.of_match_table = lm87_of_match,
 	},
 	.probe		= lm87_probe,
 	.id_table	= lm87_id,
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index aff5297bc2bc..c2f411c290bf 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -92,6 +92,7 @@
 #include <linux/hwmon.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/of_device.h>
 #include <linux/sysfs.h>
 #include <linux/interrupt.h>
 #include <linux/regulator/consumer.h>
@@ -235,6 +236,99 @@ static const struct i2c_device_id lm90_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lm90_id);
 
+static const struct of_device_id lm90_of_match[] = {
+	{
+		.compatible = "adi,adm1032",
+		.data = (void *)adm1032
+	},
+	{
+		.compatible = "adi,adt7461",
+		.data = (void *)adt7461
+	},
+	{
+		.compatible = "adi,adt7461a",
+		.data = (void *)adt7461
+	},
+	{
+		.compatible = "gmt,g781",
+		.data = (void *)g781
+	},
+	{
+		.compatible = "national,lm90",
+		.data = (void *)lm90
+	},
+	{
+		.compatible = "national,lm86",
+		.data = (void *)lm86
+	},
+	{
+		.compatible = "national,lm89",
+		.data = (void *)lm86
+	},
+	{
+		.compatible = "national,lm99",
+		.data = (void *)lm99
+	},
+	{
+		.compatible = "dallas,max6646",
+		.data = (void *)max6646
+	},
+	{
+		.compatible = "dallas,max6647",
+		.data = (void *)max6646
+	},
+	{
+		.compatible = "dallas,max6649",
+		.data = (void *)max6646
+	},
+	{
+		.compatible = "dallas,max6657",
+		.data = (void *)max6657
+	},
+	{
+		.compatible = "dallas,max6658",
+		.data = (void *)max6657
+	},
+	{
+		.compatible = "dallas,max6659",
+		.data = (void *)max6659
+	},
+	{
+		.compatible = "dallas,max6680",
+		.data = (void *)max6680
+	},
+	{
+		.compatible = "dallas,max6681",
+		.data = (void *)max6680
+	},
+	{
+		.compatible = "dallas,max6695",
+		.data = (void *)max6696
+	},
+	{
+		.compatible = "dallas,max6696",
+		.data = (void *)max6696
+	},
+	{
+		.compatible = "onnn,nct1008",
+		.data = (void *)adt7461
+	},
+	{
+		.compatible = "winbond,w83l771",
+		.data = (void *)w83l771
+	},
+	{
+		.compatible = "nxp,sa56004",
+		.data = (void *)sa56004
+	},
+	{
+		.compatible = "ti,tmp451",
+		.data = (void *)tmp451
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, lm90_of_match);
+
 /*
  * chip type specific parameters
  */
@@ -1677,7 +1771,10 @@ static int lm90_probe(struct i2c_client *client,
 	mutex_init(&data->update_lock);
 
 	/* Set the device type */
-	data->kind = id->driver_data;
+	if (client->dev.of_node)
+		data->kind = (enum chips)of_device_get_match_data(&client->dev);
+	else
+		data->kind = id->driver_data;
 	if (data->kind == adm1032) {
 		if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
 			client->flags &= ~I2C_CLIENT_PEC;
@@ -1816,6 +1913,7 @@ static struct i2c_driver lm90_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "lm90",
+		.of_match_table = of_match_ptr(lm90_of_match),
 	},
 	.probe		= lm90_probe,
 	.alert		= lm90_alert,
diff --git a/drivers/hwmon/lm95245.c b/drivers/hwmon/lm95245.c
index a3bfd88752ca..27cb06d65594 100644
--- a/drivers/hwmon/lm95245.c
+++ b/drivers/hwmon/lm95245.c
@@ -622,10 +622,18 @@ static const struct i2c_device_id lm95245_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lm95245_id);
 
+static const struct of_device_id lm95245_of_match[] = {
+	{ .compatible = "national,lm95235" },
+	{ .compatible = "national,lm95245" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, lm95245_of_match);
+
 static struct i2c_driver lm95245_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "lm95245",
+		.of_match_table = of_match_ptr(lm95245_of_match),
 	},
 	.probe		= lm95245_probe,
 	.id_table	= lm95245_id,
diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c
index f03a71722849..221fd1492057 100644
--- a/drivers/hwmon/max6697.c
+++ b/drivers/hwmon/max6697.c
@@ -24,6 +24,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/of_device.h>
 #include <linux/of.h>
 
 #include <linux/platform_data/max6697.h>
@@ -632,7 +633,10 @@ static int max6697_probe(struct i2c_client *client,
 	if (!data)
 		return -ENOMEM;
 
-	data->type = id->driver_data;
+	if (client->dev.of_node)
+		data->type = (enum chips)of_device_get_match_data(&client->dev);
+	else
+		data->type = id->driver_data;
 	data->chip = &max6697_chip_data[data->type];
 	data->client = client;
 	mutex_init(&data->update_lock);
@@ -662,10 +666,56 @@ static const struct i2c_device_id max6697_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, max6697_id);
 
+static const struct of_device_id max6697_of_match[] = {
+	{
+		.compatible = "maxim,max6581",
+		.data = (void *)max6581
+	},
+	{
+		.compatible = "maxim,max6602",
+		.data = (void *)max6602
+	},
+	{
+		.compatible = "maxim,max6622",
+		.data = (void *)max6622
+	},
+	{
+		.compatible = "maxim,max6636",
+		.data = (void *)max6636
+	},
+	{
+		.compatible = "maxim,max6689",
+		.data = (void *)max6689
+	},
+	{
+		.compatible = "maxim,max6693",
+		.data = (void *)max6693
+	},
+	{
+		.compatible = "maxim,max6694",
+		.data = (void *)max6694
+	},
+	{
+		.compatible = "maxim,max6697",
+		.data = (void *)max6697
+	},
+	{
+		.compatible = "maxim,max6698",
+		.data = (void *)max6698
+	},
+	{
+		.compatible = "maxim,max6699",
+		.data = (void *)max6699
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, max6697_of_match);
+
 static struct i2c_driver max6697_driver = {
 	.class = I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "max6697",
+		.of_match_table = of_match_ptr(max6697_of_match),
 	},
 	.probe = max6697_probe,
 	.id_table = max6697_id,
diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index 4ab5293c7bf0..00d6995af4c2 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -101,8 +101,8 @@ static const struct coefficients adm1075_coefficients[] = {
 	[0] = { 27169, 0, -1 },		/* voltage */
 	[1] = { 806, 20475, -1 },	/* current, irange25 */
 	[2] = { 404, 20475, -1 },	/* current, irange50 */
-	[3] = { 0, -1, 8549 },		/* power, irange25 */
-	[4] = { 0, -1, 4279 },		/* power, irange50 */
+	[3] = { 8549, 0, -1 },		/* power, irange25 */
+	[4] = { 4279, 0, -1 },		/* power, irange50 */
 };
 
 static const struct coefficients adm1275_coefficients[] = {
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index 3e3aa950277f..3518f0c08934 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -21,6 +21,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/slab.h>
@@ -119,6 +120,35 @@ static const struct i2c_device_id ucd9000_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ucd9000_id);
 
+static const struct of_device_id ucd9000_of_match[] = {
+	{
+		.compatible = "ti,ucd9000",
+		.data = (void *)ucd9000
+	},
+	{
+		.compatible = "ti,ucd90120",
+		.data = (void *)ucd90120
+	},
+	{
+		.compatible = "ti,ucd90124",
+		.data = (void *)ucd90124
+	},
+	{
+		.compatible = "ti,ucd90160",
+		.data = (void *)ucd90160
+	},
+	{
+		.compatible = "ti,ucd9090",
+		.data = (void *)ucd9090
+	},
+	{
+		.compatible = "ti,ucd90910",
+		.data = (void *)ucd90910
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ucd9000_of_match);
+
 static int ucd9000_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -126,6 +156,7 @@ static int ucd9000_probe(struct i2c_client *client,
 	struct ucd9000_data *data;
 	struct pmbus_driver_info *info;
 	const struct i2c_device_id *mid;
+	enum chips chip;
 	int i, ret;
 
 	if (!i2c_check_functionality(client->adapter,
@@ -151,7 +182,12 @@ static int ucd9000_probe(struct i2c_client *client,
 		return -ENODEV;
 	}
 
-	if (id->driver_data != ucd9000 && id->driver_data != mid->driver_data)
+	if (client->dev.of_node)
+		chip = (enum chips)of_device_get_match_data(&client->dev);
+	else
+		chip = id->driver_data;
+
+	if (chip != ucd9000 && chip != mid->driver_data)
 		dev_notice(&client->dev,
 			   "Device mismatch: Configured %s, detected %s\n",
 			   id->name, mid->name);
@@ -234,6 +270,7 @@ static int ucd9000_probe(struct i2c_client *client,
 static struct i2c_driver ucd9000_driver = {
 	.driver = {
 		.name = "ucd9000",
+		.of_match_table = of_match_ptr(ucd9000_of_match),
 	},
 	.probe = ucd9000_probe,
 	.remove = pmbus_do_remove,
diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c
index 033d6aca47d3..a8712c5ded4e 100644
--- a/drivers/hwmon/pmbus/ucd9200.c
+++ b/drivers/hwmon/pmbus/ucd9200.c
@@ -20,6 +20,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/slab.h>
@@ -46,12 +47,50 @@ static const struct i2c_device_id ucd9200_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ucd9200_id);
 
+static const struct of_device_id ucd9200_of_match[] = {
+	{
+		.compatible = "ti,cd9200",
+		.data = (void *)ucd9200
+	},
+	{
+		.compatible = "ti,cd9220",
+		.data = (void *)ucd9220
+	},
+	{
+		.compatible = "ti,cd9222",
+		.data = (void *)ucd9222
+	},
+	{
+		.compatible = "ti,cd9224",
+		.data = (void *)ucd9224
+	},
+	{
+		.compatible = "ti,cd9240",
+		.data = (void *)ucd9240
+	},
+	{
+		.compatible = "ti,cd9244",
+		.data = (void *)ucd9244
+	},
+	{
+		.compatible = "ti,cd9246",
+		.data = (void *)ucd9246
+	},
+	{
+		.compatible = "ti,cd9248",
+		.data = (void *)ucd9248
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ucd9200_of_match);
+
 static int ucd9200_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
 	u8 block_buffer[I2C_SMBUS_BLOCK_MAX + 1];
 	struct pmbus_driver_info *info;
 	const struct i2c_device_id *mid;
+	enum chips chip;
 	int i, j, ret;
 
 	if (!i2c_check_functionality(client->adapter,
@@ -76,7 +115,13 @@ static int ucd9200_probe(struct i2c_client *client,
 		dev_err(&client->dev, "Unsupported device\n");
 		return -ENODEV;
 	}
-	if (id->driver_data != ucd9200 && id->driver_data != mid->driver_data)
+
+	if (client->dev.of_node)
+		chip = (enum chips)of_device_get_match_data(&client->dev);
+	else
+		chip = id->driver_data;
+
+	if (chip != ucd9200 && chip != mid->driver_data)
 		dev_notice(&client->dev,
 			   "Device mismatch: Configured %s, detected %s\n",
 			   id->name, mid->name);
@@ -167,6 +212,7 @@ static int ucd9200_probe(struct i2c_client *client,
 static struct i2c_driver ucd9200_driver = {
 	.driver = {
 		.name = "ucd9200",
+		.of_match_table = of_match_ptr(ucd9200_of_match),
 	},
 	.probe = ucd9200_probe,
 	.remove = pmbus_do_remove,
diff --git a/drivers/hwmon/stts751.c b/drivers/hwmon/stts751.c
index 55450680fb58..d56251d6eec2 100644
--- a/drivers/hwmon/stts751.c
+++ b/drivers/hwmon/stts751.c
@@ -85,6 +85,12 @@ static const struct i2c_device_id stts751_id[] = {
 	{ }
 };
 
+static const struct of_device_id stts751_of_match[] = {
+	{ .compatible = "stts751" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, stts751_of_match);
+
 struct stts751_priv {
 	struct device *dev;
 	struct i2c_client *client;
@@ -819,6 +825,7 @@ static struct i2c_driver stts751_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= DEVNAME,
+		.of_match_table = of_match_ptr(stts751_of_match),
 	},
 	.probe		= stts751_probe,
 	.id_table	= stts751_id,
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 36bba2a816a4..5eafbaada795 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -323,8 +323,15 @@ static const struct i2c_device_id tmp102_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, tmp102_id);
 
+static const struct of_device_id tmp102_of_match[] = {
+	{ .compatible = "ti,tmp102" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tmp102_of_match);
+
 static struct i2c_driver tmp102_driver = {
 	.driver.name	= DRIVER_NAME,
+	.driver.of_match_table = of_match_ptr(tmp102_of_match),
 	.driver.pm	= &tmp102_dev_pm_ops,
 	.probe		= tmp102_probe,
 	.id_table	= tmp102_id,
diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c
index ad571ec795a3..7f85b14544df 100644
--- a/drivers/hwmon/tmp103.c
+++ b/drivers/hwmon/tmp103.c
@@ -150,8 +150,7 @@ static int tmp103_probe(struct i2c_client *client,
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
-#ifdef CONFIG_PM
-static int tmp103_suspend(struct device *dev)
+static int __maybe_unused tmp103_suspend(struct device *dev)
 {
 	struct regmap *regmap = dev_get_drvdata(dev);
 
@@ -159,7 +158,7 @@ static int tmp103_suspend(struct device *dev)
 				  TMP103_CONF_SD_MASK, 0);
 }
 
-static int tmp103_resume(struct device *dev)
+static int __maybe_unused tmp103_resume(struct device *dev)
 {
 	struct regmap *regmap = dev_get_drvdata(dev);
 
@@ -167,15 +166,7 @@ static int tmp103_resume(struct device *dev)
 				  TMP103_CONF_SD_MASK, TMP103_CONF_SD);
 }
 
-static const struct dev_pm_ops tmp103_dev_pm_ops = {
-	.suspend	= tmp103_suspend,
-	.resume		= tmp103_resume,
-};
-
-#define TMP103_DEV_PM_OPS (&tmp103_dev_pm_ops)
-#else
-#define	TMP103_DEV_PM_OPS NULL
-#endif /* CONFIG_PM */
+static SIMPLE_DEV_PM_OPS(tmp103_dev_pm_ops, tmp103_suspend, tmp103_resume);
 
 static const struct i2c_device_id tmp103_id[] = {
 	{ "tmp103", 0 },
@@ -183,10 +174,17 @@ static const struct i2c_device_id tmp103_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, tmp103_id);
 
+static const struct of_device_id tmp103_of_match[] = {
+	{ .compatible = "ti,tmp103" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tmp103_of_match);
+
 static struct i2c_driver tmp103_driver = {
 	.driver = {
 		.name	= "tmp103",
-		.pm	= TMP103_DEV_PM_OPS,
+		.of_match_table = of_match_ptr(tmp103_of_match),
+		.pm	= &tmp103_dev_pm_ops,
 	},
 	.probe		= tmp103_probe,
 	.id_table	= tmp103_id,
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index bfb98b96c781..e36399213324 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -29,6 +29,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/of_device.h>
 #include <linux/sysfs.h>
 
 /* Addresses to scan */
@@ -69,6 +70,31 @@ static const struct i2c_device_id tmp421_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, tmp421_id);
 
+static const struct of_device_id tmp421_of_match[] = {
+	{
+		.compatible = "ti,tmp421",
+		.data = (void *)2
+	},
+	{
+		.compatible = "ti,tmp422",
+		.data = (void *)3
+	},
+	{
+		.compatible = "ti,tmp423",
+		.data = (void *)4
+	},
+	{
+		.compatible = "ti,tmp441",
+		.data = (void *)2
+	},
+	{
+		.compatible = "ti,tmp422",
+		.data = (void *)3
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tmp421_of_match);
+
 struct tmp421_data {
 	struct i2c_client *client;
 	struct mutex update_lock;
@@ -78,7 +104,7 @@ struct tmp421_data {
 	struct hwmon_chip_info chip;
 	char valid;
 	unsigned long last_updated;
-	int channels;
+	unsigned long channels;
 	u8 config;
 	s16 temp[4];
 };
@@ -272,7 +298,11 @@ static int tmp421_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	mutex_init(&data->update_lock);
-	data->channels = id->driver_data;
+	if (client->dev.of_node)
+		data->channels = (unsigned long)
+			of_device_get_match_data(&client->dev);
+	else
+		data->channels = id->driver_data;
 	data->client = client;
 
 	err = tmp421_init_client(client);
@@ -301,6 +331,7 @@ static struct i2c_driver tmp421_driver = {
 	.class = I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "tmp421",
+		.of_match_table = of_match_ptr(tmp421_of_match),
 	},
 	.probe = tmp421_probe,
 	.id_table = tmp421_id,
diff --git a/drivers/hwmon/twl4030-madc-hwmon.c b/drivers/hwmon/twl4030-madc-hwmon.c
deleted file mode 100644
index b5caf7fdb487..000000000000
--- a/drivers/hwmon/twl4030-madc-hwmon.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *
- * TWL4030 MADC Hwmon driver-This driver monitors the real time
- * conversion of analog signals like battery temperature,
- * battery type, battery level etc. User can ask for the conversion on a
- * particular channel using the sysfs nodes.
- *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
- * J Keerthy <j-keerthy@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/i2c/twl.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/i2c/twl4030-madc.h>
-#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
-#include <linux/stddef.h>
-#include <linux/sysfs.h>
-#include <linux/err.h>
-#include <linux/types.h>
-
-/*
- * sysfs hook function
- */
-static ssize_t madc_read(struct device *dev,
-			 struct device_attribute *devattr, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct twl4030_madc_request req = {
-		.channels = 1 << attr->index,
-		.method = TWL4030_MADC_SW2,
-		.type = TWL4030_MADC_WAIT,
-	};
-	long val;
-
-	val = twl4030_madc_conversion(&req);
-	if (val < 0)
-		return val;
-
-	return sprintf(buf, "%d\n", req.rbuf[attr->index]);
-}
-
-/* sysfs nodes to read individual channels from user side */
-static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, madc_read, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, madc_read, NULL, 1);
-static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, madc_read, NULL, 2);
-static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, madc_read, NULL, 3);
-static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, madc_read, NULL, 4);
-static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, madc_read, NULL, 5);
-static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, madc_read, NULL, 6);
-static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, madc_read, NULL, 7);
-static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, madc_read, NULL, 8);
-static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, madc_read, NULL, 9);
-static SENSOR_DEVICE_ATTR(curr10_input, S_IRUGO, madc_read, NULL, 10);
-static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, madc_read, NULL, 11);
-static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, madc_read, NULL, 12);
-static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, madc_read, NULL, 15);
-
-static struct attribute *twl4030_madc_attrs[] = {
-	&sensor_dev_attr_in0_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_in2_input.dev_attr.attr,
-	&sensor_dev_attr_in3_input.dev_attr.attr,
-	&sensor_dev_attr_in4_input.dev_attr.attr,
-	&sensor_dev_attr_in5_input.dev_attr.attr,
-	&sensor_dev_attr_in6_input.dev_attr.attr,
-	&sensor_dev_attr_in7_input.dev_attr.attr,
-	&sensor_dev_attr_in8_input.dev_attr.attr,
-	&sensor_dev_attr_in9_input.dev_attr.attr,
-	&sensor_dev_attr_curr10_input.dev_attr.attr,
-	&sensor_dev_attr_in11_input.dev_attr.attr,
-	&sensor_dev_attr_in12_input.dev_attr.attr,
-	&sensor_dev_attr_in15_input.dev_attr.attr,
-	NULL
-};
-ATTRIBUTE_GROUPS(twl4030_madc);
-
-static int twl4030_madc_hwmon_probe(struct platform_device *pdev)
-{
-	struct device *hwmon;
-
-	hwmon = devm_hwmon_device_register_with_groups(&pdev->dev,
-						       "twl4030_madc", NULL,
-						       twl4030_madc_groups);
-	return PTR_ERR_OR_ZERO(hwmon);
-}
-
-static struct platform_driver twl4030_madc_hwmon_driver = {
-	.probe = twl4030_madc_hwmon_probe,
-	.driver = {
-		   .name = "twl4030_madc_hwmon",
-		   },
-};
-
-module_platform_driver(twl4030_madc_hwmon_driver);
-
-MODULE_DESCRIPTION("TWL4030 ADC Hwmon driver");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("J Keerthy");
-MODULE_ALIAS("platform:twl4030_madc_hwmon");
diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c
index ab346ed142de..ad68b6d9ff17 100644
--- a/drivers/hwmon/w83627ehf.c
+++ b/drivers/hwmon/w83627ehf.c
@@ -135,11 +135,16 @@ superio_select(int ioreg, int ld)
 	outb(ld, ioreg + 1);
 }
 
-static inline void
+static inline int
 superio_enter(int ioreg)
 {
+	if (!request_muxed_region(ioreg, 2, DRVNAME))
+		return -EBUSY;
+
 	outb(0x87, ioreg);
 	outb(0x87, ioreg);
+
+	return 0;
 }
 
 static inline void
@@ -148,6 +153,7 @@ superio_exit(int ioreg)
 	outb(0xaa, ioreg);
 	outb(0x02, ioreg);
 	outb(0x02, ioreg + 1);
+	release_region(ioreg, 2);
 }
 
 /*
@@ -1970,8 +1976,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data,
 		return;
 	}
 
-	superio_enter(sio_data->sioreg);
-
 	/* fan4 and fan5 share some pins with the GPIO and serial flash */
 	if (sio_data->kind == nct6775) {
 		/* On NCT6775, fan4 shares pins with the fdc interface */
@@ -2013,8 +2017,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data,
 		fan4min = fan4pin;
 	}
 
-	superio_exit(sio_data->sioreg);
-
 	data->has_fan = data->has_fan_min = 0x03; /* fan1 and fan2 */
 	data->has_fan |= (fan3pin << 2);
 	data->has_fan_min |= (fan3pin << 2);
@@ -2352,7 +2354,11 @@ static int w83627ehf_probe(struct platform_device *pdev)
 	w83627ehf_init_device(data, sio_data->kind);
 
 	data->vrm = vid_which_vrm();
-	superio_enter(sio_data->sioreg);
+
+	err = superio_enter(sio_data->sioreg);
+	if (err)
+		goto exit_release;
+
 	/* Read VID value */
 	if (sio_data->kind == w83667hg || sio_data->kind == w83667hg_b ||
 	    sio_data->kind == nct6775 || sio_data->kind == nct6776) {
@@ -2364,8 +2370,10 @@ static int w83627ehf_probe(struct platform_device *pdev)
 		superio_select(sio_data->sioreg, W83667HG_LD_VID);
 		data->vid = superio_inb(sio_data->sioreg, 0xe3);
 		err = device_create_file(dev, &dev_attr_cpu0_vid);
-		if (err)
+		if (err) {
+			superio_exit(sio_data->sioreg);
 			goto exit_release;
+		}
 	} else if (sio_data->kind != w83627uhg) {
 		superio_select(sio_data->sioreg, W83627EHF_LD_HWM);
 		if (superio_inb(sio_data->sioreg, SIO_REG_VID_CTRL) & 0x80) {
@@ -2401,8 +2409,10 @@ static int w83627ehf_probe(struct platform_device *pdev)
 				data->vid &= 0x3f;
 
 			err = device_create_file(dev, &dev_attr_cpu0_vid);
-			if (err)
+			if (err) {
+				superio_exit(sio_data->sioreg);
 				goto exit_release;
+			}
 		} else {
 			dev_info(dev,
 				 "VID pins in output mode, CPU VID not available\n");
@@ -2424,10 +2434,10 @@ static int w83627ehf_probe(struct platform_device *pdev)
 		pr_info("Enabled fan debounce for chip %s\n", data->name);
 	}
 
-	superio_exit(sio_data->sioreg);
-
 	w83627ehf_check_fan_inputs(sio_data, data);
 
+	superio_exit(sio_data->sioreg);
+
 	/* Read fan clock dividers immediately */
 	w83627ehf_update_fan_div_common(dev, data);
 
@@ -2712,8 +2722,11 @@ static int __init w83627ehf_find(int sioaddr, unsigned short *addr,
 
 	u16 val;
 	const char *sio_name;
+	int err;
 
-	superio_enter(sioaddr);
+	err = superio_enter(sioaddr);
+	if (err)
+		return err;
 
 	if (force_id)
 		val = force_id;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 79c4b4ea0539..90c7c101b7fd 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -129,6 +129,8 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = {
 	{ "AMDI0010", ACCESS_INTR_MASK },
 	{ "AMDI0510", 0 },
 	{ "APMC0D0F", 0 },
+	{ "HISI02A1", 0 },
+	{ "HISI02A2", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, dw_i2c_acpi_match);
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index feb30061123b..5901937284e7 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -107,7 +107,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	memcpy(scsi_req(rq)->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
 		scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
-	error = blk_execute_rq(drive->queue, disk, rq, 0);
+	blk_execute_rq(drive->queue, disk, rq, 0);
+	error = scsi_req(rq)->result ? -EIO : 0;
 put_req:
 	blk_put_request(rq);
 	return error;
@@ -454,7 +455,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			debug_log("%s: I/O error\n", drive->name);
 
 			if (drive->media != ide_tape)
-				pc->rq->errors++;
+				scsi_req(pc->rq)->result++;
 
 			if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) {
 				printk(KERN_ERR PFX "%s: I/O error in request "
@@ -488,13 +489,13 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			drive->failed_pc = NULL;
 
 		if (ata_misc_request(rq)) {
-			rq->errors = 0;
+			scsi_req(rq)->result = 0;
 			error = 0;
 		} else {
 
 			if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
-				if (rq->errors == 0)
-					rq->errors = -EIO;
+				if (scsi_req(rq)->result == 0)
+					scsi_req(rq)->result = -EIO;
 			}
 
 			error = uptodate ? 0 : -EIO;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 74f1b7dc03f7..07e5ff3a64c3 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -247,10 +247,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
 
 	struct cdrom_info *info = drive->driver_data;
 
-	if (!rq->errors)
+	if (!scsi_req(rq)->result)
 		info->write_timeout = jiffies +	ATAPI_WAIT_WRITE_BUSY;
 
-	rq->errors = 1;
+	scsi_req(rq)->result = 1;
 
 	if (time_after(jiffies, info->write_timeout))
 		return 0;
@@ -294,8 +294,8 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 	}
 
 	/* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
-	if (blk_rq_is_scsi(rq) && !rq->errors)
-		rq->errors = SAM_STAT_CHECK_CONDITION;
+	if (blk_rq_is_scsi(rq) && !scsi_req(rq)->result)
+		scsi_req(rq)->result = SAM_STAT_CHECK_CONDITION;
 
 	if (blk_noretry_request(rq))
 		do_end_request = 1;
@@ -325,7 +325,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 		 * Arrange to retry the request but be sure to give up if we've
 		 * retried too many times.
 		 */
-		if (++rq->errors > ERROR_MAX)
+		if (++scsi_req(rq)->result > ERROR_MAX)
 			do_end_request = 1;
 		break;
 	case ILLEGAL_REQUEST:
@@ -372,7 +372,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 			/* go to the default handler for other errors */
 			ide_error(drive, "cdrom_decode_status", stat);
 			return 1;
-		} else if (++rq->errors > ERROR_MAX)
+		} else if (++scsi_req(rq)->result > ERROR_MAX)
 			/* we've racked up too many retries, abort */
 			do_end_request = 1;
 	}
@@ -452,7 +452,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 			}
 		}
 
-		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
+		blk_execute_rq(drive->queue, info->disk, rq, 0);
+		error = scsi_req(rq)->result ? -EIO : 0;
 
 		if (buffer)
 			*bufflen = scsi_req(rq)->resid_len;
@@ -683,8 +684,8 @@ out_end:
 			if (cmd->nleft == 0)
 				uptodate = 1;
 		} else {
-			if (uptodate <= 0 && rq->errors == 0)
-				rq->errors = -EIO;
+			if (uptodate <= 0 && scsi_req(rq)->result == 0)
+				scsi_req(rq)->result = -EIO;
 		}
 
 		if (uptodate == 0 && rq->bio)
@@ -1379,7 +1380,7 @@ static int ide_cdrom_prep_pc(struct request *rq)
 	 * appropriate action
 	 */
 	if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) {
-		rq->errors = ILLEGAL_REQUEST;
+		scsi_req(rq)->result = ILLEGAL_REQUEST;
 		return BLKPREP_KILL;
 	}
 
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 9fcefbc8425e..55cd736c39c6 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -307,7 +307,8 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->rq_flags = RQF_QUIET;
-	ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
+	blk_execute_rq(drive->queue, cd->disk, rq, 0);
+	ret = scsi_req(rq)->result ? -EIO : 0;
 	blk_put_request(rq);
 	/*
 	 * A reset will unlock the door. If it was previously locked,
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index a45dda5386e4..9b69c32ee560 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -173,8 +173,8 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 	*(int *)&scsi_req(rq)->cmd[1] = arg;
 	rq->special = setting->set;
 
-	if (blk_execute_rq(q, NULL, rq, 0))
-		ret = rq->errors;
+	blk_execute_rq(q, NULL, rq, 0);
+	ret = scsi_req(rq)->result;
 	blk_put_request(rq);
 
 	return ret;
@@ -186,7 +186,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
 
 	err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
 	if (err)
-		rq->errors = err;
-	ide_complete_rq(drive, err, blk_rq_bytes(rq));
+		scsi_req(rq)->result = err;
+	ide_complete_rq(drive, 0, blk_rq_bytes(rq));
 	return ide_stopped;
 }
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 186159715b71..7c06237f3479 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -470,7 +470,6 @@ ide_devset_get(multcount, mult_count);
 static int set_multcount(ide_drive_t *drive, int arg)
 {
 	struct request *rq;
-	int error;
 
 	if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
 		return -EINVAL;
@@ -484,7 +483,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 
 	drive->mult_req = arg;
 	drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
-	error = blk_execute_rq(drive->queue, NULL, rq, 0);
+	blk_execute_rq(drive->queue, NULL, rq, 0);
 	blk_put_request(rq);
 
 	return (drive->mult_count == arg) ? 0 : -EIO;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 17a65ac56491..51c81223e56d 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -490,7 +490,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
 	 * make sure request is sane
 	 */
 	if (hwif->rq)
-		hwif->rq->errors = 0;
+		scsi_req(hwif->rq)->result = 0;
 	return ret;
 }
 
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index cf3af6840368..4b7ffd7d158d 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -12,7 +12,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
 	if ((stat & ATA_BUSY) ||
 	    ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
 		/* other bits are useless when BUSY */
-		rq->errors |= ERROR_RESET;
+		scsi_req(rq)->result |= ERROR_RESET;
 	} else if (stat & ATA_ERR) {
 		/* err has different meaning on cdrom and tape */
 		if (err == ATA_ABORTED) {
@@ -25,10 +25,10 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
 			drive->crc_count++;
 		} else if (err & (ATA_BBK | ATA_UNC)) {
 			/* retries won't help these */
-			rq->errors = ERROR_MAX;
+			scsi_req(rq)->result = ERROR_MAX;
 		} else if (err & ATA_TRK0NF) {
 			/* help it find track zero */
-			rq->errors |= ERROR_RECAL;
+			scsi_req(rq)->result |= ERROR_RECAL;
 		}
 	}
 
@@ -39,23 +39,23 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
 		ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE);
 	}
 
-	if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) {
+	if (scsi_req(rq)->result >= ERROR_MAX || blk_noretry_request(rq)) {
 		ide_kill_rq(drive, rq);
 		return ide_stopped;
 	}
 
 	if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
-		rq->errors |= ERROR_RESET;
+		scsi_req(rq)->result |= ERROR_RESET;
 
-	if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
-		++rq->errors;
+	if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
+		++scsi_req(rq)->result;
 		return ide_do_reset(drive);
 	}
 
-	if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+	if ((scsi_req(rq)->result & ERROR_RECAL) == ERROR_RECAL)
 		drive->special_flags |= IDE_SFLAG_RECALIBRATE;
 
-	++rq->errors;
+	++scsi_req(rq)->result;
 
 	return ide_stopped;
 }
@@ -68,7 +68,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq,
 	if ((stat & ATA_BUSY) ||
 	    ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
 		/* other bits are useless when BUSY */
-		rq->errors |= ERROR_RESET;
+		scsi_req(rq)->result |= ERROR_RESET;
 	} else {
 		/* add decoding error stuff */
 	}
@@ -77,14 +77,14 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq,
 		/* force an abort */
 		hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
 
-	if (rq->errors >= ERROR_MAX) {
+	if (scsi_req(rq)->result >= ERROR_MAX) {
 		ide_kill_rq(drive, rq);
 	} else {
-		if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
-			++rq->errors;
+		if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
+			++scsi_req(rq)->result;
 			return ide_do_reset(drive);
 		}
-		++rq->errors;
+		++scsi_req(rq)->result;
 	}
 
 	return ide_stopped;
@@ -130,11 +130,11 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 			if (cmd)
 				ide_complete_cmd(drive, cmd, stat, err);
 		} else if (ata_pm_request(rq)) {
-			rq->errors = 1;
+			scsi_req(rq)->result = 1;
 			ide_complete_pm_rq(drive, rq);
 			return ide_stopped;
 		}
-		rq->errors = err;
+		scsi_req(rq)->result = err;
 		ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
@@ -149,8 +149,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 
 	if (rq && ata_misc_request(rq) &&
 	    scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
-		if (err <= 0 && rq->errors == 0)
-			rq->errors = -EIO;
+		if (err <= 0 && scsi_req(rq)->result == 0)
+			scsi_req(rq)->result = -EIO;
 		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
 	}
 }
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index a69e8013f1df..8ac6048cd2df 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -98,7 +98,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
 	}
 
 	if (ata_misc_request(rq))
-		rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
+		scsi_req(rq)->result = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
 
 	return uptodate;
 }
@@ -239,7 +239,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 					? rq->rq_disk->disk_name
 					: "dev?"));
 
-	if (rq->errors >= ERROR_MAX) {
+	if (scsi_req(rq)->result >= ERROR_MAX) {
 		if (drive->failed_pc) {
 			ide_floppy_report_error(floppy, drive->failed_pc);
 			drive->failed_pc = NULL;
@@ -247,7 +247,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 			printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
 
 		if (ata_misc_request(rq)) {
-			rq->errors = 0;
+			scsi_req(rq)->result = 0;
 			ide_complete_rq(drive, 0, blk_rq_bytes(rq));
 			return ide_stopped;
 		} else
@@ -301,8 +301,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 	return ide_floppy_issue_pc(drive, &cmd, pc);
 out_end:
 	drive->failed_pc = NULL;
-	if (blk_rq_is_passthrough(rq) && rq->errors == 0)
-		rq->errors = -EIO;
+	if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
+		scsi_req(rq)->result = -EIO;
 	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
 	return ide_stopped;
 }
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 043b1fb963cb..45b3f41a43d4 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -141,12 +141,12 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 	drive->failed_pc = NULL;
 
 	if ((media == ide_floppy || media == ide_tape) && drv_req) {
-		rq->errors = 0;
+		scsi_req(rq)->result = 0;
 	} else {
 		if (media == ide_tape)
-			rq->errors = IDE_DRV_ERROR_GENERAL;
-		else if (blk_rq_is_passthrough(rq) && rq->errors == 0)
-			rq->errors = -EIO;
+			scsi_req(rq)->result = IDE_DRV_ERROR_GENERAL;
+		else if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
+			scsi_req(rq)->result = -EIO;
 	}
 
 	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
@@ -271,7 +271,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 #ifdef DEBUG
  	printk("%s: DRIVE_CMD (null)\n", drive->name);
 #endif
-	rq->errors = 0;
+	scsi_req(rq)->result = 0;
 	ide_complete_rq(drive, 0, blk_rq_bytes(rq));
 
  	return ide_stopped;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 248a3e0ceb46..8c0d17297a7a 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -128,7 +128,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
 		scsi_req_init(rq);
 		ide_req(rq)->type = ATA_PRIV_TASKFILE;
-		err = blk_execute_rq(drive->queue, NULL, rq, 0);
+		blk_execute_rq(drive->queue, NULL, rq, 0);
+		err = scsi_req(rq)->result ? -EIO : 0;
 		blk_put_request(rq);
 
 		return err;
@@ -227,8 +228,8 @@ static int generic_drive_reset(ide_drive_t *drive)
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 1;
 	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
-	if (blk_execute_rq(drive->queue, NULL, rq, 1))
-		ret = rq->errors;
+	blk_execute_rq(drive->queue, NULL, rq, 1);
+	ret = scsi_req(rq)->result;
 	blk_put_request(rq);
 	return ret;
 }
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 101aed9a61ca..94e3107f59b9 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -37,7 +37,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	scsi_req(rq)->cmd_len = 1;
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->special = &timeout;
-	rc = blk_execute_rq(q, NULL, rq, 1);
+	blk_execute_rq(q, NULL, rq, 1);
+	rc = scsi_req(rq)->result ? -EIO : 0;
 	blk_put_request(rq);
 	if (rc)
 		goto out;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index ec951be4b0c8..0977fc1f40ce 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -27,7 +27,8 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 		mesg.event = PM_EVENT_FREEZE;
 	rqpm.pm_state = mesg.event;
 
-	ret = blk_execute_rq(drive->queue, NULL, rq, 0);
+	blk_execute_rq(drive->queue, NULL, rq, 0);
+	ret = scsi_req(rq)->result ? -EIO : 0;
 	blk_put_request(rq);
 
 	if (ret == 0 && ide_port_acpi(hwif)) {
@@ -55,8 +56,8 @@ static int ide_pm_execute_rq(struct request *rq)
 	spin_lock_irq(q->queue_lock);
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
-		rq->errors = -ENXIO;
-		__blk_end_request_all(rq, rq->errors);
+		scsi_req(rq)->result = -ENXIO;
+		__blk_end_request_all(rq, 0);
 		spin_unlock_irq(q->queue_lock);
 		return -ENXIO;
 	}
@@ -66,7 +67,7 @@ static int ide_pm_execute_rq(struct request *rq)
 
 	wait_for_completion_io(&wait);
 
-	return rq->errors ? -EIO : 0;
+	return scsi_req(rq)->result ? -EIO : 0;
 }
 
 int generic_ide_resume(struct device *dev)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d8a552b47718..a0651f948b76 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -366,7 +366,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 			err = pc->error;
 		}
 	}
-	rq->errors = err;
+	scsi_req(rq)->result = err;
 
 	return uptodate;
 }
@@ -879,7 +879,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 		tape->valid = 0;
 
 	ret = size;
-	if (rq->errors == IDE_DRV_ERROR_GENERAL)
+	if (scsi_req(rq)->result == IDE_DRV_ERROR_GENERAL)
 		ret = -EIO;
 out_put:
 	blk_put_request(rq);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 4c0007cb74e3..d71199d23c9e 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -287,7 +287,7 @@ static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd,
 	u8 saved_io_32bit = drive->io_32bit;
 
 	if (cmd->tf_flags & IDE_TFLAG_FS)
-		cmd->rq->errors = 0;
+		scsi_req(cmd->rq)->result = 0;
 
 	if (cmd->tf_flags & IDE_TFLAG_IO_16BIT)
 		drive->io_32bit = 0;
@@ -329,7 +329,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
 	u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER);
 
 	ide_complete_cmd(drive, cmd, stat, err);
-	rq->errors = err;
+	scsi_req(rq)->result = err;
 
 	if (err == 0 && set_xfer) {
 		ide_set_xfer_rate(drive, nsect);
@@ -452,8 +452,8 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	rq->special = cmd;
 	cmd->rq = rq;
 
-	error = blk_execute_rq(drive->queue, NULL, rq, 0);
-
+	blk_execute_rq(drive->queue, NULL, rq, 0);
+	error = scsi_req(rq)->result ? -EIO : 0;
 put_req:
 	blk_put_request(rq);
 	return error;
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index efc8ec342351..e73d968023f7 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1118,6 +1118,7 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse,
  * Asus UX32VD             0x361f02        00, 15, 0e      clickpad
  * Avatar AVIU-145A2       0x361f00        ?               clickpad
  * Fujitsu LIFEBOOK E544   0x470f00        d0, 12, 09      2 hw buttons
+ * Fujitsu LIFEBOOK E547   0x470f00        50, 12, 09      2 hw buttons
  * Fujitsu LIFEBOOK E554   0x570f01        40, 14, 0c      2 hw buttons
  * Fujitsu T725            0x470f01        05, 12, 09      2 hw buttons
  * Fujitsu H730            0x570f00        c0, 14, 0c      3 hw buttons (**)
@@ -1524,6 +1525,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
 		},
 	},
 	{
+		/* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E547"),
+		},
+	},
+	{
 		/* Fujitsu LIFEBOOK E554  does not work with crc_enabled == 0 */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 312bd6ca9198..09720d950686 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -620,6 +620,13 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "20046"),
 		},
 	},
+	{
+		/* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
+		},
+	},
 	{ }
 };
 
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 275f467956ee..6c2999872090 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -76,6 +76,15 @@ config LEDS_BCM6358
 	  This option enables support for LEDs connected to the BCM6358
 	  LED HW controller accessed via MMIO registers.
 
+config LEDS_CPCAP
+	tristate "LED Support for Motorola CPCAP"
+	depends on LEDS_CLASS
+	depends on MFD_CPCAP
+	depends on OF
+	help
+	  This option enables support for LEDs offered by Motorola's
+	  CPCAP PMIC.
+
 config LEDS_LM3530
 	tristate "LCD Backlight driver for LM3530"
 	depends on LEDS_CLASS
@@ -126,6 +135,14 @@ config LEDS_MIKROTIK_RB532
 	  This option enables support for the so called "User LED" of
 	  Mikrotik's Routerboard 532.
 
+config LEDS_MT6323
+	tristate "LED Support for Mediatek MT6323 PMIC"
+	depends on LEDS_CLASS
+	depends on MFD_MT6397
+	help
+	  This option enables support for on-chip LED drivers found on
+	  Mediatek MT6323 PMIC.
+
 config LEDS_S3C24XX
 	tristate "LED Support for Samsung S3C24XX GPIO LEDs"
 	depends on LEDS_CLASS
@@ -241,7 +258,6 @@ config LEDS_LP3952
 	tristate "LED Support for TI LP3952 2 channel LED driver"
 	depends on LEDS_CLASS
 	depends on I2C
-	depends on ACPI
 	depends on GPIOLIB
 	select REGMAP_I2C
 	help
@@ -463,15 +479,6 @@ config LEDS_ADP5520
 	  To compile this driver as a module, choose M here: the module will
 	  be called leds-adp5520.
 
-config LEDS_DELL_NETBOOKS
-	tristate "External LED on Dell Business Netbooks"
-	depends on LEDS_CLASS
-	depends on X86 && ACPI_WMI
-	depends on DELL_SMBIOS
-	help
-	  This adds support for the Latitude 2100 and similar
-	  notebooks that have an external LED.
-
 config LEDS_MC13783
 	tristate "LED Support for MC13XXX PMIC"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 6b8273736478..45f133962ed8 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_LEDS_AAT1290)		+= leds-aat1290.o
 obj-$(CONFIG_LEDS_BCM6328)		+= leds-bcm6328.o
 obj-$(CONFIG_LEDS_BCM6358)		+= leds-bcm6358.o
 obj-$(CONFIG_LEDS_BD2802)		+= leds-bd2802.o
+obj-$(CONFIG_LEDS_CPCAP)		+= leds-cpcap.o
 obj-$(CONFIG_LEDS_LOCOMO)		+= leds-locomo.o
 obj-$(CONFIG_LEDS_LM3530)		+= leds-lm3530.o
 obj-$(CONFIG_LEDS_LM3533)		+= leds-lm3533.o
@@ -52,7 +53,6 @@ obj-$(CONFIG_LEDS_REGULATOR)		+= leds-regulator.o
 obj-$(CONFIG_LEDS_INTEL_SS4200)		+= leds-ss4200.o
 obj-$(CONFIG_LEDS_LT3593)		+= leds-lt3593.o
 obj-$(CONFIG_LEDS_ADP5520)		+= leds-adp5520.o
-obj-$(CONFIG_LEDS_DELL_NETBOOKS)	+= dell-led.o
 obj-$(CONFIG_LEDS_MC13783)		+= leds-mc13783.o
 obj-$(CONFIG_LEDS_NS2)			+= leds-ns2.o
 obj-$(CONFIG_LEDS_NETXBIG)		+= leds-netxbig.o
@@ -72,6 +72,7 @@ obj-$(CONFIG_LEDS_IS31FL32XX)		+= leds-is31fl32xx.o
 obj-$(CONFIG_LEDS_PM8058)		+= leds-pm8058.o
 obj-$(CONFIG_LEDS_MLXCPLD)		+= leds-mlxcpld.o
 obj-$(CONFIG_LEDS_NIC78BX)		+= leds-nic78bx.o
+obj-$(CONFIG_LEDS_MT6323)		+= leds-mt6323.o
 
 # LED SPI Drivers
 obj-$(CONFIG_LEDS_DAC124S085)		+= leds-dac124s085.o
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index f2b0a80a62b4..b0e2d55acbd6 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -244,11 +244,14 @@ static int led_classdev_next_name(const char *init_name, char *name,
 }
 
 /**
- * led_classdev_register - register a new object of led_classdev class.
- * @parent: The device to register.
+ * of_led_classdev_register - register a new object of led_classdev class.
+ *
+ * @parent: parent of LED device
  * @led_cdev: the led_classdev structure for this device.
+ * @np: DT node describing this LED
  */
-int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
+int of_led_classdev_register(struct device *parent, struct device_node *np,
+			    struct led_classdev *led_cdev)
 {
 	char name[LED_MAX_NAME_SIZE];
 	int ret;
@@ -261,6 +264,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 				led_cdev, led_cdev->groups, "%s", name);
 	if (IS_ERR(led_cdev->dev))
 		return PTR_ERR(led_cdev->dev);
+	led_cdev->dev->of_node = np;
 
 	if (ret)
 		dev_warn(parent, "Led %s renamed to %s due to name collision",
@@ -303,7 +307,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(led_classdev_register);
+EXPORT_SYMBOL_GPL(of_led_classdev_register);
 
 /**
  * led_classdev_unregister - unregisters a object of led_properties class.
@@ -348,12 +352,14 @@ static void devm_led_classdev_release(struct device *dev, void *res)
 }
 
 /**
- * devm_led_classdev_register - resource managed led_classdev_register()
- * @parent: The device to register.
+ * devm_of_led_classdev_register - resource managed led_classdev_register()
+ *
+ * @parent: parent of LED device
  * @led_cdev: the led_classdev structure for this device.
  */
-int devm_led_classdev_register(struct device *parent,
-			       struct led_classdev *led_cdev)
+int devm_of_led_classdev_register(struct device *parent,
+				  struct device_node *np,
+				  struct led_classdev *led_cdev)
 {
 	struct led_classdev **dr;
 	int rc;
@@ -362,7 +368,7 @@ int devm_led_classdev_register(struct device *parent,
 	if (!dr)
 		return -ENOMEM;
 
-	rc = led_classdev_register(parent, led_cdev);
+	rc = of_led_classdev_register(parent, np, led_cdev);
 	if (rc) {
 		devres_free(dr);
 		return rc;
@@ -373,7 +379,7 @@ int devm_led_classdev_register(struct device *parent,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(devm_led_classdev_register);
+EXPORT_SYMBOL_GPL(devm_of_led_classdev_register);
 
 static int devm_led_classdev_match(struct device *dev, void *res, void *data)
 {
diff --git a/drivers/leds/leds-cpcap.c b/drivers/leds/leds-cpcap.c
new file mode 100644
index 000000000000..f0f28c442807
--- /dev/null
+++ b/drivers/leds/leds-cpcap.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2017 Sebastian Reichel <sre@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * later as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/leds.h>
+#include <linux/mfd/motorola-cpcap.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+
+#define CPCAP_LED_NO_CURRENT 0x0001
+
+struct cpcap_led_info {
+	u16 reg;
+	u16 mask;
+	u16 limit;
+	u16 init_mask;
+	u16 init_val;
+};
+
+static const struct cpcap_led_info cpcap_led_red = {
+	.reg	= CPCAP_REG_REDC,
+	.mask	= 0x03FF,
+	.limit	= 31,
+};
+
+static const struct cpcap_led_info cpcap_led_green = {
+	.reg	= CPCAP_REG_GREENC,
+	.mask	= 0x03FF,
+	.limit	= 31,
+};
+
+static const struct cpcap_led_info cpcap_led_blue = {
+	.reg	= CPCAP_REG_BLUEC,
+	.mask	= 0x03FF,
+	.limit	= 31,
+};
+
+/* aux display light */
+static const struct cpcap_led_info cpcap_led_adl = {
+	.reg		= CPCAP_REG_ADLC,
+	.mask		= 0x000F,
+	.limit		= 1,
+	.init_mask	= 0x7FFF,
+	.init_val	= 0x5FF0,
+};
+
+/* camera privacy led */
+static const struct cpcap_led_info cpcap_led_cp = {
+	.reg		= CPCAP_REG_CLEDC,
+	.mask		= 0x0007,
+	.limit		= 1,
+	.init_mask	= 0x03FF,
+	.init_val	= 0x0008,
+};
+
+struct cpcap_led {
+	struct led_classdev led;
+	const struct cpcap_led_info *info;
+	struct device *dev;
+	struct regmap *regmap;
+	struct mutex update_lock;
+	struct regulator *vdd;
+	bool powered;
+
+	u32 current_limit;
+};
+
+static u16 cpcap_led_val(u8 current_limit, u8 duty_cycle)
+{
+	current_limit &= 0x1f; /* 5 bit */
+	duty_cycle &= 0x0f; /* 4 bit */
+
+	return current_limit << 4 | duty_cycle;
+}
+
+static int cpcap_led_set_power(struct cpcap_led *led, bool status)
+{
+	int err;
+
+	if (status == led->powered)
+		return 0;
+
+	if (status)
+		err = regulator_enable(led->vdd);
+	else
+		err = regulator_disable(led->vdd);
+
+	if (err) {
+		dev_err(led->dev, "regulator failure: %d", err);
+		return err;
+	}
+
+	led->powered = status;
+
+	return 0;
+}
+
+static int cpcap_led_set(struct led_classdev *ledc, enum led_brightness value)
+{
+	struct cpcap_led *led = container_of(ledc, struct cpcap_led, led);
+	int brightness;
+	int err;
+
+	mutex_lock(&led->update_lock);
+
+	if (value > LED_OFF) {
+		err = cpcap_led_set_power(led, true);
+		if (err)
+			goto exit;
+	}
+
+	if (value == LED_OFF) {
+		/* Avoid HW issue by turning off current before duty cycle */
+		err = regmap_update_bits(led->regmap,
+			led->info->reg, led->info->mask, CPCAP_LED_NO_CURRENT);
+		if (err) {
+			dev_err(led->dev, "regmap failed: %d", err);
+			goto exit;
+		}
+
+		brightness = cpcap_led_val(value, LED_OFF);
+	} else {
+		brightness = cpcap_led_val(value, LED_ON);
+	}
+
+	err = regmap_update_bits(led->regmap, led->info->reg, led->info->mask,
+		brightness);
+	if (err) {
+		dev_err(led->dev, "regmap failed: %d", err);
+		goto exit;
+	}
+
+	if (value == LED_OFF) {
+		err = cpcap_led_set_power(led, false);
+		if (err)
+			goto exit;
+	}
+
+exit:
+	mutex_unlock(&led->update_lock);
+	return err;
+}
+
+static const struct of_device_id cpcap_led_of_match[] = {
+	{ .compatible = "motorola,cpcap-led-red", .data = &cpcap_led_red },
+	{ .compatible = "motorola,cpcap-led-green", .data = &cpcap_led_green },
+	{ .compatible = "motorola,cpcap-led-blue",  .data = &cpcap_led_blue },
+	{ .compatible = "motorola,cpcap-led-adl", .data = &cpcap_led_adl },
+	{ .compatible = "motorola,cpcap-led-cp", .data = &cpcap_led_cp },
+	{},
+};
+MODULE_DEVICE_TABLE(of, cpcap_led_of_match);
+
+static int cpcap_led_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct cpcap_led *led;
+	int err;
+
+	match = of_match_device(of_match_ptr(cpcap_led_of_match), &pdev->dev);
+	if (!match || !match->data)
+		return -EINVAL;
+
+	led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL);
+	if (!led)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, led);
+	led->info = match->data;
+	led->dev = &pdev->dev;
+
+	if (led->info->reg == 0x0000) {
+		dev_err(led->dev, "Unsupported LED");
+		return -ENODEV;
+	}
+
+	led->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!led->regmap)
+		return -ENODEV;
+
+	led->vdd = devm_regulator_get(&pdev->dev, "vdd");
+	if (IS_ERR(led->vdd)) {
+		err = PTR_ERR(led->vdd);
+		dev_err(led->dev, "Couldn't get regulator: %d", err);
+		return err;
+	}
+
+	err = device_property_read_string(&pdev->dev, "label", &led->led.name);
+	if (err) {
+		dev_err(led->dev, "Couldn't read LED label: %d", err);
+		return err;
+	}
+
+	if (led->info->init_mask) {
+		err = regmap_update_bits(led->regmap, led->info->reg,
+			led->info->init_mask, led->info->init_val);
+		if (err) {
+			dev_err(led->dev, "regmap failed: %d", err);
+			return err;
+		}
+	}
+
+	mutex_init(&led->update_lock);
+
+	led->led.max_brightness = led->info->limit;
+	led->led.brightness_set_blocking = cpcap_led_set;
+	err = devm_led_classdev_register(&pdev->dev, &led->led);
+	if (err) {
+		dev_err(led->dev, "Couldn't register LED: %d", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static struct platform_driver cpcap_led_driver = {
+	.probe = cpcap_led_probe,
+	.driver = {
+		.name = "cpcap-led",
+		.of_match_table = cpcap_led_of_match,
+	},
+};
+module_platform_driver(cpcap_led_driver);
+
+MODULE_DESCRIPTION("CPCAP LED driver");
+MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 066fc7590729..e753ba93ba1e 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -77,7 +77,7 @@ static int gpio_blink_set(struct led_classdev *led_cdev,
 
 static int create_gpio_led(const struct gpio_led *template,
 	struct gpio_led_data *led_dat, struct device *parent,
-	gpio_blink_set_t blink_set)
+	struct device_node *np, gpio_blink_set_t blink_set)
 {
 	int ret, state;
 
@@ -139,7 +139,7 @@ static int create_gpio_led(const struct gpio_led *template,
 	if (ret < 0)
 		return ret;
 
-	return devm_led_classdev_register(parent, &led_dat->cdev);
+	return devm_of_led_classdev_register(parent, np, &led_dat->cdev);
 }
 
 struct gpio_leds_priv {
@@ -208,7 +208,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
 		if (fwnode_property_present(child, "panic-indicator"))
 			led.panic_indicator = 1;
 
-		ret = create_gpio_led(&led, led_dat, dev, NULL);
+		ret = create_gpio_led(&led, led_dat, dev, np, NULL);
 		if (ret < 0) {
 			fwnode_handle_put(child);
 			return ERR_PTR(ret);
@@ -242,9 +242,9 @@ static int gpio_led_probe(struct platform_device *pdev)
 
 		priv->num_leds = pdata->num_leds;
 		for (i = 0; i < priv->num_leds; i++) {
-			ret = create_gpio_led(&pdata->leds[i],
-					      &priv->leds[i],
-					      &pdev->dev, pdata->gpio_blink_set);
+			ret = create_gpio_led(&pdata->leds[i], &priv->leds[i],
+					      &pdev->dev, NULL,
+					      pdata->gpio_blink_set);
 			if (ret < 0)
 				return ret;
 		}
diff --git a/drivers/leds/leds-lp3952.c b/drivers/leds/leds-lp3952.c
index 4847e89883a7..847f7f282126 100644
--- a/drivers/leds/leds-lp3952.c
+++ b/drivers/leds/leds-lp3952.c
@@ -10,7 +10,6 @@
  *
  */
 
-#include <linux/acpi.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/i2c.h>
@@ -103,10 +102,11 @@ static int lp3952_get_label(struct device *dev, const char *label, char *dest)
 	const char *str;
 
 	ret = device_property_read_string(dev, label, &str);
-	if (!ret)
-		strncpy(dest, str, LP3952_LABEL_MAX_LEN);
+	if (ret)
+		return ret;
 
-	return ret;
+	strncpy(dest, str, LP3952_LABEL_MAX_LEN);
+	return 0;
 }
 
 static int lp3952_register_led_classdev(struct lp3952_led_array *priv)
@@ -276,19 +276,9 @@ static const struct i2c_device_id lp3952_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lp3952_id);
 
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id lp3952_acpi_match[] = {
-	{"TXNW3952", 0},
-	{}
-};
-
-MODULE_DEVICE_TABLE(acpi, lp3952_acpi_match);
-#endif
-
 static struct i2c_driver lp3952_i2c_driver = {
 	.driver = {
 			.name = LP3952_NAME,
-			.acpi_match_table = ACPI_PTR(lp3952_acpi_match),
 	},
 	.probe = lp3952_probe,
 	.remove = lp3952_remove,
diff --git a/drivers/leds/leds-mt6323.c b/drivers/leds/leds-mt6323.c
new file mode 100644
index 000000000000..8893c74e9a1f
--- /dev/null
+++ b/drivers/leds/leds-mt6323.c
@@ -0,0 +1,502 @@
+/*
+ * LED driver for Mediatek MT6323 PMIC
+ *
+ * Copyright (C) 2017 Sean Wang <sean.wang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/mfd/mt6323/registers.h>
+#include <linux/mfd/mt6397/core.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/*
+ * Register field for MT6323_TOP_CKPDN0 to enable
+ * 32K clock common for LED device.
+ */
+#define MT6323_RG_DRV_32K_CK_PDN	BIT(11)
+#define MT6323_RG_DRV_32K_CK_PDN_MASK	BIT(11)
+
+/*
+ * Register field for MT6323_TOP_CKPDN2 to enable
+ * individual clock for LED device.
+ */
+#define MT6323_RG_ISINK_CK_PDN(i)	BIT(i)
+#define MT6323_RG_ISINK_CK_PDN_MASK(i)	BIT(i)
+
+/*
+ * Register field for MT6323_TOP_CKCON1 to select
+ * clock source.
+ */
+#define MT6323_RG_ISINK_CK_SEL_MASK(i)	(BIT(10) << (i))
+
+/*
+ * Register for MT6323_ISINK_CON0 to setup the
+ * duty cycle of the blink.
+ */
+#define MT6323_ISINK_CON0(i)		(MT6323_ISINK0_CON0 + 0x8 * (i))
+#define MT6323_ISINK_DIM_DUTY_MASK	(0x1f << 8)
+#define MT6323_ISINK_DIM_DUTY(i)	(((i) << 8) & \
+					MT6323_ISINK_DIM_DUTY_MASK)
+
+/* Register to setup the period of the blink. */
+#define MT6323_ISINK_CON1(i)		(MT6323_ISINK0_CON1 + 0x8 * (i))
+#define MT6323_ISINK_DIM_FSEL_MASK	(0xffff)
+#define MT6323_ISINK_DIM_FSEL(i)	((i) & MT6323_ISINK_DIM_FSEL_MASK)
+
+/* Register to control the brightness. */
+#define MT6323_ISINK_CON2(i)		(MT6323_ISINK0_CON2 + 0x8 * (i))
+#define MT6323_ISINK_CH_STEP_SHIFT	12
+#define MT6323_ISINK_CH_STEP_MASK	(0x7 << 12)
+#define MT6323_ISINK_CH_STEP(i)		(((i) << 12) & \
+					MT6323_ISINK_CH_STEP_MASK)
+#define MT6323_ISINK_SFSTR0_TC_MASK	(0x3 << 1)
+#define MT6323_ISINK_SFSTR0_TC(i)	(((i) << 1) & \
+					MT6323_ISINK_SFSTR0_TC_MASK)
+#define MT6323_ISINK_SFSTR0_EN_MASK	BIT(0)
+#define MT6323_ISINK_SFSTR0_EN		BIT(0)
+
+/* Register to LED channel enablement. */
+#define MT6323_ISINK_CH_EN_MASK(i)	BIT(i)
+#define MT6323_ISINK_CH_EN(i)		BIT(i)
+
+#define MT6323_MAX_PERIOD		10000
+#define MT6323_MAX_LEDS			4
+#define MT6323_MAX_BRIGHTNESS		6
+#define MT6323_UNIT_DUTY		3125
+#define MT6323_CAL_HW_DUTY(o, p)	DIV_ROUND_CLOSEST((o) * 100000ul,\
+					(p) * MT6323_UNIT_DUTY)
+
+struct mt6323_leds;
+
+/**
+ * struct mt6323_led - state container for the LED device
+ * @id:			the identifier in MT6323 LED device
+ * @parent:		the pointer to MT6323 LED controller
+ * @cdev:		LED class device for this LED device
+ * @current_brightness: current state of the LED device
+ */
+struct mt6323_led {
+	int			id;
+	struct mt6323_leds	*parent;
+	struct led_classdev	cdev;
+	enum led_brightness	current_brightness;
+};
+
+/**
+ * struct mt6323_leds -	state container for holding LED controller
+ *			of the driver
+ * @dev:		the device pointer
+ * @hw:			the underlying hardware providing shared
+ *			bus for the register operations
+ * @lock:		the lock among process context
+ * @led:		the array that contains the state of individual
+ *			LED device
+ */
+struct mt6323_leds {
+	struct device		*dev;
+	struct mt6397_chip	*hw;
+	/* protect among process context */
+	struct mutex		lock;
+	struct mt6323_led	*led[MT6323_MAX_LEDS];
+};
+
+static int mt6323_led_hw_brightness(struct led_classdev *cdev,
+				    enum led_brightness brightness)
+{
+	struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev);
+	struct mt6323_leds *leds = led->parent;
+	struct regmap *regmap = leds->hw->regmap;
+	u32 con2_mask = 0, con2_val = 0;
+	int ret;
+
+	/*
+	 * Setup current output for the corresponding
+	 * brightness level.
+	 */
+	con2_mask |= MT6323_ISINK_CH_STEP_MASK |
+		     MT6323_ISINK_SFSTR0_TC_MASK |
+		     MT6323_ISINK_SFSTR0_EN_MASK;
+	con2_val |=  MT6323_ISINK_CH_STEP(brightness - 1) |
+		     MT6323_ISINK_SFSTR0_TC(2) |
+		     MT6323_ISINK_SFSTR0_EN;
+
+	ret = regmap_update_bits(regmap, MT6323_ISINK_CON2(led->id),
+				 con2_mask, con2_val);
+	return ret;
+}
+
+static int mt6323_led_hw_off(struct led_classdev *cdev)
+{
+	struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev);
+	struct mt6323_leds *leds = led->parent;
+	struct regmap *regmap = leds->hw->regmap;
+	unsigned int status;
+	int ret;
+
+	status = MT6323_ISINK_CH_EN(led->id);
+	ret = regmap_update_bits(regmap, MT6323_ISINK_EN_CTRL,
+				 MT6323_ISINK_CH_EN_MASK(led->id), ~status);
+	if (ret < 0)
+		return ret;
+
+	usleep_range(100, 300);
+	ret = regmap_update_bits(regmap, MT6323_TOP_CKPDN2,
+				 MT6323_RG_ISINK_CK_PDN_MASK(led->id),
+				 MT6323_RG_ISINK_CK_PDN(led->id));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static enum led_brightness
+mt6323_get_led_hw_brightness(struct led_classdev *cdev)
+{
+	struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev);
+	struct mt6323_leds *leds = led->parent;
+	struct regmap *regmap = leds->hw->regmap;
+	unsigned int status;
+	int ret;
+
+	ret = regmap_read(regmap, MT6323_TOP_CKPDN2, &status);
+	if (ret < 0)
+		return ret;
+
+	if (status & MT6323_RG_ISINK_CK_PDN_MASK(led->id))
+		return 0;
+
+	ret = regmap_read(regmap, MT6323_ISINK_EN_CTRL, &status);
+	if (ret < 0)
+		return ret;
+
+	if (!(status & MT6323_ISINK_CH_EN(led->id)))
+		return 0;
+
+	ret = regmap_read(regmap, MT6323_ISINK_CON2(led->id), &status);
+	if (ret < 0)
+		return ret;
+
+	return  ((status & MT6323_ISINK_CH_STEP_MASK)
+		  >> MT6323_ISINK_CH_STEP_SHIFT) + 1;
+}
+
+static int mt6323_led_hw_on(struct led_classdev *cdev,
+			    enum led_brightness brightness)
+{
+	struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev);
+	struct mt6323_leds *leds = led->parent;
+	struct regmap *regmap = leds->hw->regmap;
+	unsigned int status;
+	int ret;
+
+	/*
+	 * Setup required clock source, enable the corresponding
+	 * clock and channel and let work with continuous blink as
+	 * the default.
+	 */
+	ret = regmap_update_bits(regmap, MT6323_TOP_CKCON1,
+				 MT6323_RG_ISINK_CK_SEL_MASK(led->id), 0);
+	if (ret < 0)
+		return ret;
+
+	status = MT6323_RG_ISINK_CK_PDN(led->id);
+	ret = regmap_update_bits(regmap, MT6323_TOP_CKPDN2,
+				 MT6323_RG_ISINK_CK_PDN_MASK(led->id),
+				 ~status);
+	if (ret < 0)
+		return ret;
+
+	usleep_range(100, 300);
+
+	ret = regmap_update_bits(regmap, MT6323_ISINK_EN_CTRL,
+				 MT6323_ISINK_CH_EN_MASK(led->id),
+				 MT6323_ISINK_CH_EN(led->id));
+	if (ret < 0)
+		return ret;
+
+	ret = mt6323_led_hw_brightness(cdev, brightness);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_update_bits(regmap, MT6323_ISINK_CON0(led->id),
+				 MT6323_ISINK_DIM_DUTY_MASK,
+				 MT6323_ISINK_DIM_DUTY(31));
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_update_bits(regmap, MT6323_ISINK_CON1(led->id),
+				 MT6323_ISINK_DIM_FSEL_MASK,
+				 MT6323_ISINK_DIM_FSEL(1000));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int mt6323_led_set_blink(struct led_classdev *cdev,
+				unsigned long *delay_on,
+				unsigned long *delay_off)
+{
+	struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev);
+	struct mt6323_leds *leds = led->parent;
+	struct regmap *regmap = leds->hw->regmap;
+	unsigned long period;
+	u8 duty_hw;
+	int ret;
+
+	/*
+	 * Units are in ms, if over the hardware able
+	 * to support, fallback into software blink
+	 */
+	period = *delay_on + *delay_off;
+
+	if (period > MT6323_MAX_PERIOD)
+		return -EINVAL;
+
+	/*
+	 * LED subsystem requires a default user
+	 * friendly blink pattern for the LED so using
+	 * 1Hz duty cycle 50% here if without specific
+	 * value delay_on and delay off being assigned.
+	 */
+	if (!*delay_on && !*delay_off) {
+		*delay_on = 500;
+		*delay_off = 500;
+	}
+
+	/*
+	 * Calculate duty_hw based on the percentage of period during
+	 * which the led is ON.
+	 */
+	duty_hw = MT6323_CAL_HW_DUTY(*delay_on, period);
+
+	/* hardware doesn't support zero duty cycle. */
+	if (!duty_hw)
+		return -EINVAL;
+
+	mutex_lock(&leds->lock);
+	/*
+	 * Set max_brightness as the software blink behavior
+	 * when no blink brightness.
+	 */
+	if (!led->current_brightness) {
+		ret = mt6323_led_hw_on(cdev, cdev->max_brightness);
+		if (ret < 0)
+			goto out;
+		led->current_brightness = cdev->max_brightness;
+	}
+
+	ret = regmap_update_bits(regmap, MT6323_ISINK_CON0(led->id),
+				 MT6323_ISINK_DIM_DUTY_MASK,
+				 MT6323_ISINK_DIM_DUTY(duty_hw - 1));
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(regmap, MT6323_ISINK_CON1(led->id),
+				 MT6323_ISINK_DIM_FSEL_MASK,
+				 MT6323_ISINK_DIM_FSEL(period - 1));
+out:
+	mutex_unlock(&leds->lock);
+
+	return ret;
+}
+
+static int mt6323_led_set_brightness(struct led_classdev *cdev,
+				     enum led_brightness brightness)
+{
+	struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev);
+	struct mt6323_leds *leds = led->parent;
+	int ret;
+
+	mutex_lock(&leds->lock);
+
+	if (!led->current_brightness && brightness) {
+		ret = mt6323_led_hw_on(cdev, brightness);
+		if (ret < 0)
+			goto out;
+	} else if (brightness) {
+		ret = mt6323_led_hw_brightness(cdev, brightness);
+		if (ret < 0)
+			goto out;
+	} else {
+		ret = mt6323_led_hw_off(cdev);
+		if (ret < 0)
+			goto out;
+	}
+
+	led->current_brightness = brightness;
+out:
+	mutex_unlock(&leds->lock);
+
+	return ret;
+}
+
+static int mt6323_led_set_dt_default(struct led_classdev *cdev,
+				     struct device_node *np)
+{
+	struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev);
+	const char *state;
+	int ret = 0;
+
+	led->cdev.name = of_get_property(np, "label", NULL) ? : np->name;
+	led->cdev.default_trigger = of_get_property(np,
+						    "linux,default-trigger",
+						    NULL);
+
+	state = of_get_property(np, "default-state", NULL);
+	if (state) {
+		if (!strcmp(state, "keep")) {
+			ret = mt6323_get_led_hw_brightness(cdev);
+			if (ret < 0)
+				return ret;
+			led->current_brightness = ret;
+			ret = 0;
+		} else if (!strcmp(state, "on")) {
+			ret =
+			mt6323_led_set_brightness(cdev, cdev->max_brightness);
+		} else  {
+			ret = mt6323_led_set_brightness(cdev, LED_OFF);
+		}
+	}
+
+	return ret;
+}
+
+static int mt6323_led_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *child;
+	struct mt6397_chip *hw = dev_get_drvdata(pdev->dev.parent);
+	struct mt6323_leds *leds;
+	struct mt6323_led *led;
+	int ret;
+	unsigned int status;
+	u32 reg;
+
+	leds = devm_kzalloc(dev, sizeof(*leds), GFP_KERNEL);
+	if (!leds)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, leds);
+	leds->dev = dev;
+
+	/*
+	 * leds->hw points to the underlying bus for the register
+	 * controlled.
+	 */
+	leds->hw = hw;
+	mutex_init(&leds->lock);
+
+	status = MT6323_RG_DRV_32K_CK_PDN;
+	ret = regmap_update_bits(leds->hw->regmap, MT6323_TOP_CKPDN0,
+				 MT6323_RG_DRV_32K_CK_PDN_MASK, ~status);
+	if (ret < 0) {
+		dev_err(leds->dev,
+			"Failed to update MT6323_TOP_CKPDN0 Register\n");
+		return ret;
+	}
+
+	for_each_available_child_of_node(np, child) {
+		ret = of_property_read_u32(child, "reg", &reg);
+		if (ret) {
+			dev_err(dev, "Failed to read led 'reg' property\n");
+			goto put_child_node;
+		}
+
+		if (reg >= MT6323_MAX_LEDS || leds->led[reg]) {
+			dev_err(dev, "Invalid led reg %u\n", reg);
+			ret = -EINVAL;
+			goto put_child_node;
+		}
+
+		led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL);
+		if (!led) {
+			ret = -ENOMEM;
+			goto put_child_node;
+		}
+
+		leds->led[reg] = led;
+		leds->led[reg]->id = reg;
+		leds->led[reg]->cdev.max_brightness = MT6323_MAX_BRIGHTNESS;
+		leds->led[reg]->cdev.brightness_set_blocking =
+					mt6323_led_set_brightness;
+		leds->led[reg]->cdev.blink_set = mt6323_led_set_blink;
+		leds->led[reg]->cdev.brightness_get =
+					mt6323_get_led_hw_brightness;
+		leds->led[reg]->parent = leds;
+
+		ret = mt6323_led_set_dt_default(&leds->led[reg]->cdev, child);
+		if (ret < 0) {
+			dev_err(leds->dev,
+				"Failed to LED set default from devicetree\n");
+			goto put_child_node;
+		}
+
+		ret = devm_led_classdev_register(dev, &leds->led[reg]->cdev);
+		if (ret) {
+			dev_err(&pdev->dev, "Failed to register LED: %d\n",
+				ret);
+			goto put_child_node;
+		}
+		leds->led[reg]->cdev.dev->of_node = child;
+	}
+
+	return 0;
+
+put_child_node:
+	of_node_put(child);
+	return ret;
+}
+
+static int mt6323_led_remove(struct platform_device *pdev)
+{
+	struct mt6323_leds *leds = platform_get_drvdata(pdev);
+	int i;
+
+	/* Turn the LEDs off on driver removal. */
+	for (i = 0 ; leds->led[i] ; i++)
+		mt6323_led_hw_off(&leds->led[i]->cdev);
+
+	regmap_update_bits(leds->hw->regmap, MT6323_TOP_CKPDN0,
+			   MT6323_RG_DRV_32K_CK_PDN_MASK,
+			   MT6323_RG_DRV_32K_CK_PDN);
+
+	mutex_destroy(&leds->lock);
+
+	return 0;
+}
+
+static const struct of_device_id mt6323_led_dt_match[] = {
+	{ .compatible = "mediatek,mt6323-led" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mt6323_led_dt_match);
+
+static struct platform_driver mt6323_led_driver = {
+	.probe		= mt6323_led_probe,
+	.remove		= mt6323_led_remove,
+	.driver		= {
+		.name	= "mt6323-led",
+		.of_match_table = mt6323_led_dt_match,
+	},
+};
+
+module_platform_driver(mt6323_led_driver);
+
+MODULE_DESCRIPTION("LED driver for Mediatek MT6323 PMIC");
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
index 06e63106ae1e..7fea18b0c15d 100644
--- a/drivers/leds/leds-pca9532.c
+++ b/drivers/leds/leds-pca9532.c
@@ -254,6 +254,21 @@ static void pca9532_input_work(struct work_struct *work)
 	mutex_unlock(&data->update_lock);
 }
 
+static enum pca9532_state pca9532_getled(struct pca9532_led *led)
+{
+	struct i2c_client *client = led->client;
+	struct pca9532_data *data = i2c_get_clientdata(client);
+	u8 maxleds = data->chip_info->num_leds;
+	char reg;
+	enum pca9532_state ret;
+
+	mutex_lock(&data->update_lock);
+	reg = i2c_smbus_read_byte_data(client, LED_REG(maxleds, led->id));
+	ret = reg >> LED_NUM(led->id)/2;
+	mutex_unlock(&data->update_lock);
+	return ret;
+}
+
 #ifdef CONFIG_LEDS_PCA9532_GPIO
 static int pca9532_gpio_request_pin(struct gpio_chip *gc, unsigned offset)
 {
@@ -366,7 +381,10 @@ static int pca9532_configure(struct i2c_client *client,
 			gpios++;
 			break;
 		case PCA9532_TYPE_LED:
-			led->state = pled->state;
+			if (pled->state == PCA9532_KEEP)
+				led->state = pca9532_getled(led);
+			else
+				led->state = pled->state;
 			led->name = pled->name;
 			led->ldev.name = led->name;
 			led->ldev.default_trigger = pled->default_trigger;
@@ -456,6 +474,7 @@ pca9532_of_populate_pdata(struct device *dev, struct device_node *np)
 	const struct of_device_id *match;
 	int devid, maxleds;
 	int i = 0;
+	const char *state;
 
 	match = of_match_device(of_pca9532_leds_match, dev);
 	if (!match)
@@ -475,6 +494,12 @@ pca9532_of_populate_pdata(struct device *dev, struct device_node *np)
 		of_property_read_u32(child, "type", &pdata->leds[i].type);
 		of_property_read_string(child, "linux,default-trigger",
 					&pdata->leds[i].default_trigger);
+		if (!of_property_read_string(child, "default-state", &state)) {
+			if (!strcmp(state, "on"))
+				pdata->leds[i].state = PCA9532_ON;
+			else if (!strcmp(state, "keep"))
+				pdata->leds[i].state = PCA9532_KEEP;
+		}
 		if (++i >= maxleds) {
 			of_node_put(child);
 			break;
diff --git a/drivers/leds/trigger/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c
index a41896468cb3..66a626091936 100644
--- a/drivers/leds/trigger/ledtrig-cpu.c
+++ b/drivers/leds/trigger/ledtrig-cpu.c
@@ -31,12 +31,16 @@
 #define MAX_NAME_LEN	8
 
 struct led_trigger_cpu {
+	bool is_active;
 	char name[MAX_NAME_LEN];
 	struct led_trigger *_trig;
 };
 
 static DEFINE_PER_CPU(struct led_trigger_cpu, cpu_trig);
 
+static struct led_trigger *trig_cpu_all;
+static atomic_t num_active_cpus = ATOMIC_INIT(0);
+
 /**
  * ledtrig_cpu - emit a CPU event as a trigger
  * @evt: CPU event to be emitted
@@ -47,26 +51,46 @@ static DEFINE_PER_CPU(struct led_trigger_cpu, cpu_trig);
 void ledtrig_cpu(enum cpu_led_event ledevt)
 {
 	struct led_trigger_cpu *trig = this_cpu_ptr(&cpu_trig);
+	bool is_active = trig->is_active;
 
 	/* Locate the correct CPU LED */
 	switch (ledevt) {
 	case CPU_LED_IDLE_END:
 	case CPU_LED_START:
 		/* Will turn the LED on, max brightness */
-		led_trigger_event(trig->_trig, LED_FULL);
+		is_active = true;
 		break;
 
 	case CPU_LED_IDLE_START:
 	case CPU_LED_STOP:
 	case CPU_LED_HALTED:
 		/* Will turn the LED off */
-		led_trigger_event(trig->_trig, LED_OFF);
+		is_active = false;
 		break;
 
 	default:
 		/* Will leave the LED as it is */
 		break;
 	}
+
+	if (is_active != trig->is_active) {
+		unsigned int active_cpus;
+		unsigned int total_cpus;
+
+		/* Update trigger state */
+		trig->is_active = is_active;
+		atomic_add(is_active ? 1 : -1, &num_active_cpus);
+		active_cpus = atomic_read(&num_active_cpus);
+		total_cpus = num_present_cpus();
+
+		led_trigger_event(trig->_trig,
+			is_active ? LED_FULL : LED_OFF);
+
+
+		led_trigger_event(trig_cpu_all,
+			DIV_ROUND_UP(LED_FULL * active_cpus, total_cpus));
+
+	}
 }
 EXPORT_SYMBOL(ledtrig_cpu);
 
@@ -113,6 +137,11 @@ static int __init ledtrig_cpu_init(void)
 	BUILD_BUG_ON(CONFIG_NR_CPUS > 9999);
 
 	/*
+	 * Registering a trigger for all CPUs.
+	 */
+	led_trigger_register_simple("cpu", &trig_cpu_all);
+
+	/*
 	 * Registering CPU led trigger for each CPU core here
 	 * ignores CPU hotplug, but after this CPU hotplug works
 	 * fine with this trigger.
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 052714106b7b..ead61a93cb4e 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -33,4 +33,13 @@ config NVM_RRPC
 	host. The target is implemented using a linear mapping table and
 	cost-based garbage collection. It is optimized for 4K IO sizes.
 
+config NVM_PBLK
+	tristate "Physical Block Device Open-Channel SSD target"
+	---help---
+	Allows an open-channel SSD to be exposed as a block device to the
+	host. The target assumes the device exposes raw flash and must be
+	explicitly managed by the host.
+
+	Please note the disk format is considered EXPERIMENTAL for now.
+
 endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index b2a39e2d2895..82d1a117fb27 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -4,3 +4,8 @@
 
 obj-$(CONFIG_NVM)		:= core.o
 obj-$(CONFIG_NVM_RRPC)		+= rrpc.o
+obj-$(CONFIG_NVM_PBLK)		+= pblk.o
+pblk-y				:= pblk-init.o pblk-core.o pblk-rb.o \
+				   pblk-write.o pblk-cache.o pblk-read.o \
+				   pblk-gc.o pblk-recovery.o pblk-map.o \
+				   pblk-rl.o pblk-sysfs.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 5262ba66a7a7..54a06c3a2b8c 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -89,7 +89,7 @@ static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin,
 		WARN_ON(!test_and_clear_bit(i, dev->lun_map));
 }
 
-static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
+static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
 	struct nvm_dev_map *dev_map = tgt_dev->map;
@@ -100,11 +100,14 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
 		int *lun_offs = ch_map->lun_offs;
 		int ch = i + ch_map->ch_off;
 
-		for (j = 0; j < ch_map->nr_luns; j++) {
-			int lun = j + lun_offs[j];
-			int lunid = (ch * dev->geo.luns_per_chnl) + lun;
+		if (clear) {
+			for (j = 0; j < ch_map->nr_luns; j++) {
+				int lun = j + lun_offs[j];
+				int lunid = (ch * dev->geo.luns_per_chnl) + lun;
 
-			WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+				WARN_ON(!test_and_clear_bit(lunid,
+							dev->lun_map));
+			}
 		}
 
 		kfree(ch_map->lun_offs);
@@ -232,6 +235,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	struct nvm_target *t;
 	struct nvm_tgt_dev *tgt_dev;
 	void *targetdata;
+	int ret;
 
 	tt = nvm_find_target_type(create->tgttype, 1);
 	if (!tt) {
@@ -252,34 +256,43 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 		return -ENOMEM;
 
 	t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
-	if (!t)
+	if (!t) {
+		ret = -ENOMEM;
 		goto err_reserve;
+	}
 
 	tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end);
 	if (!tgt_dev) {
 		pr_err("nvm: could not create target device\n");
+		ret = -ENOMEM;
 		goto err_t;
 	}
 
-	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
-	if (!tqueue)
+	tdisk = alloc_disk(0);
+	if (!tdisk) {
+		ret = -ENOMEM;
 		goto err_dev;
-	blk_queue_make_request(tqueue, tt->make_rq);
+	}
 
-	tdisk = alloc_disk(0);
-	if (!tdisk)
-		goto err_queue;
+	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+	if (!tqueue) {
+		ret = -ENOMEM;
+		goto err_disk;
+	}
+	blk_queue_make_request(tqueue, tt->make_rq);
 
-	sprintf(tdisk->disk_name, "%s", create->tgtname);
+	strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
 	tdisk->flags = GENHD_FL_EXT_DEVT;
 	tdisk->major = 0;
 	tdisk->first_minor = 0;
 	tdisk->fops = &nvm_fops;
 	tdisk->queue = tqueue;
 
-	targetdata = tt->init(tgt_dev, tdisk);
-	if (IS_ERR(targetdata))
+	targetdata = tt->init(tgt_dev, tdisk, create->flags);
+	if (IS_ERR(targetdata)) {
+		ret = PTR_ERR(targetdata);
 		goto err_init;
+	}
 
 	tdisk->private_data = targetdata;
 	tqueue->queuedata = targetdata;
@@ -289,8 +302,10 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	set_capacity(tdisk, tt->capacity(targetdata));
 	add_disk(tdisk);
 
-	if (tt->sysfs_init && tt->sysfs_init(tdisk))
+	if (tt->sysfs_init && tt->sysfs_init(tdisk)) {
+		ret = -ENOMEM;
 		goto err_sysfs;
+	}
 
 	t->type = tt;
 	t->disk = tdisk;
@@ -305,16 +320,17 @@ err_sysfs:
 	if (tt->exit)
 		tt->exit(targetdata);
 err_init:
-	put_disk(tdisk);
-err_queue:
 	blk_cleanup_queue(tqueue);
+	tdisk->queue = NULL;
+err_disk:
+	put_disk(tdisk);
 err_dev:
-	nvm_remove_tgt_dev(tgt_dev);
+	nvm_remove_tgt_dev(tgt_dev, 0);
 err_t:
 	kfree(t);
 err_reserve:
 	nvm_release_luns_err(dev, s->lun_begin, s->lun_end);
-	return -ENOMEM;
+	return ret;
 }
 
 static void __nvm_remove_target(struct nvm_target *t)
@@ -332,7 +348,7 @@ static void __nvm_remove_target(struct nvm_target *t)
 	if (tt->exit)
 		tt->exit(tdisk->private_data);
 
-	nvm_remove_tgt_dev(t->dev);
+	nvm_remove_tgt_dev(t->dev, 1);
 	put_disk(tdisk);
 
 	list_del(&t->list);
@@ -411,6 +427,18 @@ err_rmap:
 	return -ENOMEM;
 }
 
+static void nvm_unregister_map(struct nvm_dev *dev)
+{
+	struct nvm_dev_map *rmap = dev->rmap;
+	int i;
+
+	for (i = 0; i < dev->geo.nr_chnls; i++)
+		kfree(rmap->chnls[i].lun_offs);
+
+	kfree(rmap->chnls);
+	kfree(rmap);
+}
+
 static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
 {
 	struct nvm_dev_map *dev_map = tgt_dev->map;
@@ -486,7 +514,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
 		int *lun_roffs;
 		struct ppa_addr gaddr;
 		u64 pba = le64_to_cpu(entries[i]);
-		int off;
 		u64 diff;
 
 		if (!pba)
@@ -496,8 +523,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
 		ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
 		lun_roffs = ch_rmap->lun_offs;
 
-		off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
-
 		diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
 				(lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
 
@@ -590,11 +615,11 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
-	nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
+	nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1);
 	nvm_rq_tgt_to_dev(tgt_dev, &rqd);
 
 	ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
-	nvm_free_rqd_ppalist(dev, &rqd);
+	nvm_free_rqd_ppalist(tgt_dev, &rqd);
 	if (ret) {
 		pr_err("nvm: failed bb mark\n");
 		return -EINVAL;
@@ -626,34 +651,45 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 }
 EXPORT_SYMBOL(nvm_submit_io);
 
-int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags)
+static void nvm_end_io_sync(struct nvm_rq *rqd)
 {
-	struct nvm_dev *dev = tgt_dev->parent;
-	struct nvm_rq rqd;
-	int ret;
+	struct completion *waiting = rqd->private;
 
-	if (!dev->ops->erase_block)
-		return 0;
+	complete(waiting);
+}
 
-	nvm_map_to_dev(tgt_dev, ppas);
+int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
+								int nr_ppas)
+{
+	struct nvm_geo *geo = &tgt_dev->geo;
+	struct nvm_rq rqd;
+	int ret;
+	DECLARE_COMPLETION_ONSTACK(wait);
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
-	ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1);
+	rqd.opcode = NVM_OP_ERASE;
+	rqd.end_io = nvm_end_io_sync;
+	rqd.private = &wait;
+	rqd.flags = geo->plane_mode >> 1;
+
+	ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1);
 	if (ret)
 		return ret;
 
-	nvm_rq_tgt_to_dev(tgt_dev, &rqd);
-
-	rqd.flags = flags;
-
-	ret = dev->ops->erase_block(dev, &rqd);
+	ret = nvm_submit_io(tgt_dev, &rqd);
+	if (ret) {
+		pr_err("rrpr: erase I/O submission failed: %d\n", ret);
+		goto free_ppa_list;
+	}
+	wait_for_completion_io(&wait);
 
-	nvm_free_rqd_ppalist(dev, &rqd);
+free_ppa_list:
+	nvm_free_rqd_ppalist(tgt_dev, &rqd);
 
 	return ret;
 }
-EXPORT_SYMBOL(nvm_erase_blk);
+EXPORT_SYMBOL(nvm_erase_sync);
 
 int nvm_get_l2p_tbl(struct nvm_tgt_dev *tgt_dev, u64 slba, u32 nlb,
 		    nvm_l2p_update_fn *update_l2p, void *priv)
@@ -732,10 +768,11 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin)
 }
 EXPORT_SYMBOL(nvm_put_area);
 
-int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
+int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
 			const struct ppa_addr *ppas, int nr_ppas, int vblk)
 {
-	struct nvm_geo *geo = &dev->geo;
+	struct nvm_dev *dev = tgt_dev->parent;
+	struct nvm_geo *geo = &tgt_dev->geo;
 	int i, plane_cnt, pl_idx;
 	struct ppa_addr ppa;
 
@@ -773,12 +810,12 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
 }
 EXPORT_SYMBOL(nvm_set_rqd_ppalist);
 
-void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
+void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
 	if (!rqd->ppa_list)
 		return;
 
-	nvm_dev_dma_free(dev, rqd->ppa_list, rqd->dma_ppa_list);
+	nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
 }
 EXPORT_SYMBOL(nvm_free_rqd_ppalist);
 
@@ -972,7 +1009,7 @@ err_fmtype:
 	return ret;
 }
 
-void nvm_free(struct nvm_dev *dev)
+static void nvm_free(struct nvm_dev *dev)
 {
 	if (!dev)
 		return;
@@ -980,7 +1017,7 @@ void nvm_free(struct nvm_dev *dev)
 	if (dev->dma_pool)
 		dev->ops->destroy_dma_pool(dev->dma_pool);
 
-	kfree(dev->rmap);
+	nvm_unregister_map(dev);
 	kfree(dev->lptbl);
 	kfree(dev->lun_map);
 	kfree(dev);
@@ -1174,13 +1211,13 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
 	list_for_each_entry(dev, &nvm_devices, devices) {
 		struct nvm_ioctl_device_info *info = &devices->info[i];
 
-		sprintf(info->devname, "%s", dev->name);
+		strlcpy(info->devname, dev->name, sizeof(info->devname));
 
 		/* kept for compatibility */
 		info->bmversion[0] = 1;
 		info->bmversion[1] = 0;
 		info->bmversion[2] = 0;
-		sprintf(info->bmname, "%s", "gennvm");
+		strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
 		i++;
 
 		if (i > 31) {
@@ -1217,8 +1254,16 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
 	create.tgtname[DISK_NAME_LEN - 1] = '\0';
 
 	if (create.flags != 0) {
-		pr_err("nvm: no flags supported\n");
-		return -EINVAL;
+		__u32 flags = create.flags;
+
+		/* Check for valid flags */
+		if (flags & NVM_TARGET_FACTORY)
+			flags &= ~NVM_TARGET_FACTORY;
+
+		if (flags) {
+			pr_err("nvm: flag not supported\n");
+			return -EINVAL;
+		}
 	}
 
 	return __nvm_configure_create(&create);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
new file mode 100644
index 000000000000..59bcea88db84
--- /dev/null
+++ b/drivers/lightnvm/pblk-cache.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-cache.c - pblk's write cache
+ */
+
+#include "pblk.h"
+
+int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
+{
+	struct pblk_w_ctx w_ctx;
+	sector_t lba = pblk_get_lba(bio);
+	unsigned int bpos, pos;
+	int nr_entries = pblk_get_secs(bio);
+	int i, ret;
+
+	/* Update the write buffer head (mem) with the entries that we can
+	 * write. The write in itself cannot fail, so there is no need to
+	 * rollback from here on.
+	 */
+retry:
+	ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
+	if (ret == NVM_IO_REQUEUE) {
+		io_schedule();
+		goto retry;
+	}
+
+	if (unlikely(!bio_has_data(bio)))
+		goto out;
+
+	w_ctx.flags = flags;
+	pblk_ppa_set_empty(&w_ctx.ppa);
+
+	for (i = 0; i < nr_entries; i++) {
+		void *data = bio_data(bio);
+
+		w_ctx.lba = lba + i;
+
+		pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
+		pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
+
+		bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(nr_entries, &pblk->inflight_writes);
+	atomic_long_add(nr_entries, &pblk->req_writes);
+#endif
+
+out:
+	pblk_write_should_kick(pblk);
+	return ret;
+}
+
+/*
+ * On GC the incoming lbas are not necessarily sequential. Also, some of the
+ * lbas might not be valid entries, which are marked as empty by the GC thread
+ */
+int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
+			   unsigned int nr_entries, unsigned int nr_rec_entries,
+			   struct pblk_line *gc_line, unsigned long flags)
+{
+	struct pblk_w_ctx w_ctx;
+	unsigned int bpos, pos;
+	int i, valid_entries;
+
+	/* Update the write buffer head (mem) with the entries that we can
+	 * write. The write in itself cannot fail, so there is no need to
+	 * rollback from here on.
+	 */
+retry:
+	if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) {
+		io_schedule();
+		goto retry;
+	}
+
+	w_ctx.flags = flags;
+	pblk_ppa_set_empty(&w_ctx.ppa);
+
+	for (i = 0, valid_entries = 0; i < nr_entries; i++) {
+		if (lba_list[i] == ADDR_EMPTY)
+			continue;
+
+		w_ctx.lba = lba_list[i];
+
+		pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
+		pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos);
+
+		data += PBLK_EXPOSED_PAGE_SIZE;
+		valid_entries++;
+	}
+
+	WARN_ONCE(nr_rec_entries != valid_entries,
+					"pblk: inconsistent GC write\n");
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(valid_entries, &pblk->inflight_writes);
+	atomic_long_add(valid_entries, &pblk->recov_gc_writes);
+#endif
+
+	pblk_write_should_kick(pblk);
+	return NVM_IO_OK;
+}
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
new file mode 100644
index 000000000000..5e44768ccffa
--- /dev/null
+++ b/drivers/lightnvm/pblk-core.c
@@ -0,0 +1,1667 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-core.c - pblk's core functionality
+ *
+ */
+
+#include "pblk.h"
+#include <linux/time.h>
+
+static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
+			 struct ppa_addr *ppa)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int pos = pblk_dev_ppa_to_pos(geo, *ppa);
+
+	pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
+	atomic_long_inc(&pblk->erase_failed);
+
+	atomic_dec(&line->blk_in_line);
+	if (test_and_set_bit(pos, line->blk_bitmap))
+		pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
+							line->id, pos);
+
+	pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+}
+
+static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	struct pblk_line *line;
+
+	line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)];
+	atomic_dec(&line->left_seblks);
+
+	if (rqd->error) {
+		struct ppa_addr *ppa;
+
+		ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
+		if (!ppa)
+			return;
+
+		*ppa = rqd->ppa_addr;
+		pblk_mark_bb(pblk, line, ppa);
+	}
+}
+
+/* Erase completion assumes that only one block is erased at the time */
+static void pblk_end_io_erase(struct nvm_rq *rqd)
+{
+	struct pblk *pblk = rqd->private;
+
+	up(&pblk->erase_sem);
+	__pblk_end_io_erase(pblk, rqd);
+	mempool_free(rqd, pblk->r_rq_pool);
+}
+
+static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+				  u64 paddr)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct list_head *move_list = NULL;
+
+	/* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
+	 * table is modified with reclaimed sectors, a check is done to endure
+	 * that newer updates are not overwritten.
+	 */
+	spin_lock(&line->lock);
+	if (line->state == PBLK_LINESTATE_GC ||
+					line->state == PBLK_LINESTATE_FREE) {
+		spin_unlock(&line->lock);
+		return;
+	}
+
+	if (test_and_set_bit(paddr, line->invalid_bitmap)) {
+		WARN_ONCE(1, "pblk: double invalidate\n");
+		spin_unlock(&line->lock);
+		return;
+	}
+	line->vsc--;
+
+	if (line->state == PBLK_LINESTATE_CLOSED)
+		move_list = pblk_line_gc_list(pblk, line);
+	spin_unlock(&line->lock);
+
+	if (move_list) {
+		spin_lock(&l_mg->gc_lock);
+		spin_lock(&line->lock);
+		/* Prevent moving a line that has just been chosen for GC */
+		if (line->state == PBLK_LINESTATE_GC ||
+					line->state == PBLK_LINESTATE_FREE) {
+			spin_unlock(&line->lock);
+			spin_unlock(&l_mg->gc_lock);
+			return;
+		}
+		spin_unlock(&line->lock);
+
+		list_move_tail(&line->list, move_list);
+		spin_unlock(&l_mg->gc_lock);
+	}
+}
+
+void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
+{
+	struct pblk_line *line;
+	u64 paddr;
+	int line_id;
+
+#ifdef CONFIG_NVM_DEBUG
+	/* Callers must ensure that the ppa points to a device address */
+	BUG_ON(pblk_addr_in_cache(ppa));
+	BUG_ON(pblk_ppa_empty(ppa));
+#endif
+
+	line_id = pblk_tgt_ppa_to_line(ppa);
+	line = &pblk->lines[line_id];
+	paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
+
+	__pblk_map_invalidate(pblk, line, paddr);
+}
+
+void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
+			     u64 paddr)
+{
+	__pblk_map_invalidate(pblk, line, paddr);
+
+	pblk_rb_sync_init(&pblk->rwb, NULL);
+	line->left_ssecs--;
+	if (!line->left_ssecs)
+		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
+	pblk_rb_sync_end(&pblk->rwb, NULL);
+}
+
+static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
+				  unsigned int nr_secs)
+{
+	sector_t lba;
+
+	spin_lock(&pblk->trans_lock);
+	for (lba = slba; lba < slba + nr_secs; lba++) {
+		struct ppa_addr ppa;
+
+		ppa = pblk_trans_map_get(pblk, lba);
+
+		if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
+			pblk_map_invalidate(pblk, ppa);
+
+		pblk_ppa_set_empty(&ppa);
+		pblk_trans_map_set(pblk, lba, ppa);
+	}
+	spin_unlock(&pblk->trans_lock);
+}
+
+struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
+{
+	mempool_t *pool;
+	struct nvm_rq *rqd;
+	int rq_size;
+
+	if (rw == WRITE) {
+		pool = pblk->w_rq_pool;
+		rq_size = pblk_w_rq_size;
+	} else {
+		pool = pblk->r_rq_pool;
+		rq_size = pblk_r_rq_size;
+	}
+
+	rqd = mempool_alloc(pool, GFP_KERNEL);
+	memset(rqd, 0, rq_size);
+
+	return rqd;
+}
+
+void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
+{
+	mempool_t *pool;
+
+	if (rw == WRITE)
+		pool = pblk->w_rq_pool;
+	else
+		pool = pblk->r_rq_pool;
+
+	mempool_free(rqd, pool);
+}
+
+void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
+			 int nr_pages)
+{
+	struct bio_vec bv;
+	int i;
+
+	WARN_ON(off + nr_pages != bio->bi_vcnt);
+
+	bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
+	for (i = off; i < nr_pages + off; i++) {
+		bv = bio->bi_io_vec[i];
+		mempool_free(bv.bv_page, pblk->page_pool);
+	}
+}
+
+int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
+		       int nr_pages)
+{
+	struct request_queue *q = pblk->dev->q;
+	struct page *page;
+	int i, ret;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = mempool_alloc(pblk->page_pool, flags);
+		if (!page)
+			goto err;
+
+		ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
+		if (ret != PBLK_EXPOSED_PAGE_SIZE) {
+			pr_err("pblk: could not add page to bio\n");
+			mempool_free(page, pblk->page_pool);
+			goto err;
+		}
+	}
+
+	return 0;
+err:
+	pblk_bio_free_pages(pblk, bio, 0, i - 1);
+	return -1;
+}
+
+static void pblk_write_kick(struct pblk *pblk)
+{
+	wake_up_process(pblk->writer_ts);
+	mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
+}
+
+void pblk_write_timer_fn(unsigned long data)
+{
+	struct pblk *pblk = (struct pblk *)data;
+
+	/* kick the write thread every tick to flush outstanding data */
+	pblk_write_kick(pblk);
+}
+
+void pblk_write_should_kick(struct pblk *pblk)
+{
+	unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
+
+	if (secs_avail >= pblk->min_write_pgs)
+		pblk_write_kick(pblk);
+}
+
+void pblk_end_bio_sync(struct bio *bio)
+{
+	struct completion *waiting = bio->bi_private;
+
+	complete(waiting);
+}
+
+void pblk_end_io_sync(struct nvm_rq *rqd)
+{
+	struct completion *waiting = rqd->private;
+
+	complete(waiting);
+}
+
+void pblk_flush_writer(struct pblk *pblk)
+{
+	struct bio *bio;
+	int ret;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	bio = bio_alloc(GFP_KERNEL, 1);
+	if (!bio)
+		return;
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
+	bio->bi_private = &wait;
+	bio->bi_end_io = pblk_end_bio_sync;
+
+	ret = pblk_write_to_cache(pblk, bio, 0);
+	if (ret == NVM_IO_OK) {
+		if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+			pr_err("pblk: flush cache timed out\n");
+		}
+	} else if (ret != NVM_IO_DONE) {
+		pr_err("pblk: tear down bio failed\n");
+	}
+
+	if (bio->bi_error)
+		pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+
+	bio_put(bio);
+}
+
+struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct list_head *move_list = NULL;
+
+	if (!line->vsc) {
+		if (line->gc_group != PBLK_LINEGC_FULL) {
+			line->gc_group = PBLK_LINEGC_FULL;
+			move_list = &l_mg->gc_full_list;
+		}
+	} else if (line->vsc < lm->mid_thrs) {
+		if (line->gc_group != PBLK_LINEGC_HIGH) {
+			line->gc_group = PBLK_LINEGC_HIGH;
+			move_list = &l_mg->gc_high_list;
+		}
+	} else if (line->vsc < lm->high_thrs) {
+		if (line->gc_group != PBLK_LINEGC_MID) {
+			line->gc_group = PBLK_LINEGC_MID;
+			move_list = &l_mg->gc_mid_list;
+		}
+	} else if (line->vsc < line->sec_in_line) {
+		if (line->gc_group != PBLK_LINEGC_LOW) {
+			line->gc_group = PBLK_LINEGC_LOW;
+			move_list = &l_mg->gc_low_list;
+		}
+	} else if (line->vsc == line->sec_in_line) {
+		if (line->gc_group != PBLK_LINEGC_EMPTY) {
+			line->gc_group = PBLK_LINEGC_EMPTY;
+			move_list = &l_mg->gc_empty_list;
+		}
+	} else {
+		line->state = PBLK_LINESTATE_CORRUPT;
+		line->gc_group = PBLK_LINEGC_NONE;
+		move_list =  &l_mg->corrupt_list;
+		pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
+						line->id, line->vsc,
+						line->sec_in_line,
+						lm->high_thrs, lm->mid_thrs);
+	}
+
+	return move_list;
+}
+
+void pblk_discard(struct pblk *pblk, struct bio *bio)
+{
+	sector_t slba = pblk_get_lba(bio);
+	sector_t nr_secs = pblk_get_secs(bio);
+
+	pblk_invalidate_range(pblk, slba, nr_secs);
+}
+
+struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
+{
+	struct ppa_addr ppa;
+
+	spin_lock(&pblk->trans_lock);
+	ppa = pblk_trans_map_get(pblk, lba);
+	spin_unlock(&pblk->trans_lock);
+
+	return ppa;
+}
+
+void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	atomic_long_inc(&pblk->write_failed);
+#ifdef CONFIG_NVM_DEBUG
+	pblk_print_failed_rqd(pblk, rqd, rqd->error);
+#endif
+}
+
+void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	/* Empty page read is not necessarily an error (e.g., L2P recovery) */
+	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
+		atomic_long_inc(&pblk->read_empty);
+		return;
+	}
+
+	switch (rqd->error) {
+	case NVM_RSP_WARN_HIGHECC:
+		atomic_long_inc(&pblk->read_high_ecc);
+		break;
+	case NVM_RSP_ERR_FAILECC:
+	case NVM_RSP_ERR_FAILCRC:
+		atomic_long_inc(&pblk->read_failed);
+		break;
+	default:
+		pr_err("pblk: unknown read error:%d\n", rqd->error);
+	}
+#ifdef CONFIG_NVM_DEBUG
+	pblk_print_failed_rqd(pblk, rqd, rqd->error);
+#endif
+}
+
+int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+
+#ifdef CONFIG_NVM_DEBUG
+	struct ppa_addr *ppa_list;
+
+	ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
+	if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	if (rqd->opcode == NVM_OP_PWRITE) {
+		struct pblk_line *line;
+		struct ppa_addr ppa;
+		int i;
+
+		for (i = 0; i < rqd->nr_ppas; i++) {
+			ppa = ppa_list[i];
+			line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
+
+			spin_lock(&line->lock);
+			if (line->state != PBLK_LINESTATE_OPEN) {
+				pr_err("pblk: bad ppa: line:%d,state:%d\n",
+							line->id, line->state);
+				WARN_ON(1);
+				spin_unlock(&line->lock);
+				return -EINVAL;
+			}
+			spin_unlock(&line->lock);
+		}
+	}
+#endif
+	return nvm_submit_io(dev, rqd);
+}
+
+struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
+			      unsigned int nr_secs, unsigned int len,
+			      gfp_t gfp_mask)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	void *kaddr = data;
+	struct page *page;
+	struct bio *bio;
+	int i, ret;
+
+	if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+		return bio_map_kern(dev->q, kaddr, len, gfp_mask);
+
+	bio = bio_kmalloc(gfp_mask, nr_secs);
+	if (!bio)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < nr_secs; i++) {
+		page = vmalloc_to_page(kaddr);
+		if (!page) {
+			pr_err("pblk: could not map vmalloc bio\n");
+			bio_put(bio);
+			bio = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+
+		ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
+		if (ret != PAGE_SIZE) {
+			pr_err("pblk: could not add page to bio\n");
+			bio_put(bio);
+			bio = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+
+		kaddr += PAGE_SIZE;
+	}
+out:
+	return bio;
+}
+
+int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
+		   unsigned long secs_to_flush)
+{
+	int max = pblk->max_write_pgs;
+	int min = pblk->min_write_pgs;
+	int secs_to_sync = 0;
+
+	if (secs_avail >= max)
+		secs_to_sync = max;
+	else if (secs_avail >= min)
+		secs_to_sync = min * (secs_avail / min);
+	else if (secs_to_flush)
+		secs_to_sync = min;
+
+	return secs_to_sync;
+}
+
+static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
+			     int nr_secs)
+{
+	u64 addr;
+	int i;
+
+	/* logic error: ppa out-of-bounds. Prevent generating bad address */
+	if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
+		WARN(1, "pblk: page allocation out of bounds\n");
+		nr_secs = pblk->lm.sec_per_line - line->cur_sec;
+	}
+
+	line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
+					pblk->lm.sec_per_line, line->cur_sec);
+	for (i = 0; i < nr_secs; i++, line->cur_sec++)
+		WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
+
+	return addr;
+}
+
+u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+	u64 addr;
+
+	/* Lock needed in case a write fails and a recovery needs to remap
+	 * failed write buffer entries
+	 */
+	spin_lock(&line->lock);
+	addr = __pblk_alloc_page(pblk, line, nr_secs);
+	line->left_msecs -= nr_secs;
+	WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
+	spin_unlock(&line->lock);
+
+	return addr;
+}
+
+/*
+ * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
+ * taking the per LUN semaphore.
+ */
+static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
+				     u64 paddr, int dir)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct bio *bio;
+	struct nvm_rq rqd;
+	struct ppa_addr *ppa_list;
+	dma_addr_t dma_ppa_list;
+	void *emeta = line->emeta;
+	int min = pblk->min_write_pgs;
+	int left_ppas = lm->emeta_sec;
+	int id = line->id;
+	int rq_ppas, rq_len;
+	int cmd_op, bio_op;
+	int flags;
+	int i, j;
+	int ret;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	if (dir == WRITE) {
+		bio_op = REQ_OP_WRITE;
+		cmd_op = NVM_OP_PWRITE;
+		flags = pblk_set_progr_mode(pblk, WRITE);
+	} else if (dir == READ) {
+		bio_op = REQ_OP_READ;
+		cmd_op = NVM_OP_PREAD;
+		flags = pblk_set_read_mode(pblk);
+	} else
+		return -EINVAL;
+
+	ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
+	if (!ppa_list)
+		return -ENOMEM;
+
+next_rq:
+	memset(&rqd, 0, sizeof(struct nvm_rq));
+
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	rq_len = rq_ppas * geo->sec_size;
+
+	bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+	if (IS_ERR(bio)) {
+		ret = PTR_ERR(bio);
+		goto free_rqd_dma;
+	}
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, bio_op, 0);
+
+	rqd.bio = bio;
+	rqd.opcode = cmd_op;
+	rqd.flags = flags;
+	rqd.nr_ppas = rq_ppas;
+	rqd.ppa_list = ppa_list;
+	rqd.dma_ppa_list = dma_ppa_list;
+	rqd.end_io = pblk_end_io_sync;
+	rqd.private = &wait;
+
+	if (dir == WRITE) {
+		for (i = 0; i < rqd.nr_ppas; ) {
+			spin_lock(&line->lock);
+			paddr = __pblk_alloc_page(pblk, line, min);
+			spin_unlock(&line->lock);
+			for (j = 0; j < min; j++, i++, paddr++)
+				rqd.ppa_list[i] =
+					addr_to_gen_ppa(pblk, paddr, id);
+		}
+	} else {
+		for (i = 0; i < rqd.nr_ppas; ) {
+			struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
+			int pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+			while (test_bit(pos, line->blk_bitmap)) {
+				paddr += min;
+				if (pblk_boundary_paddr_checks(pblk, paddr)) {
+					pr_err("pblk: corrupt emeta line:%d\n",
+								line->id);
+					bio_put(bio);
+					ret = -EINTR;
+					goto free_rqd_dma;
+				}
+
+				ppa = addr_to_gen_ppa(pblk, paddr, id);
+				pos = pblk_dev_ppa_to_pos(geo, ppa);
+			}
+
+			if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
+				pr_err("pblk: corrupt emeta line:%d\n",
+								line->id);
+				bio_put(bio);
+				ret = -EINTR;
+				goto free_rqd_dma;
+			}
+
+			for (j = 0; j < min; j++, i++, paddr++)
+				rqd.ppa_list[i] =
+					addr_to_gen_ppa(pblk, paddr, line->id);
+		}
+	}
+
+	ret = pblk_submit_io(pblk, &rqd);
+	if (ret) {
+		pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+		bio_put(bio);
+		goto free_rqd_dma;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: emeta I/O timed out\n");
+	}
+	reinit_completion(&wait);
+
+	bio_put(bio);
+
+	if (rqd.error) {
+		if (dir == WRITE)
+			pblk_log_write_err(pblk, &rqd);
+		else
+			pblk_log_read_err(pblk, &rqd);
+	}
+
+	emeta += rq_len;
+	left_ppas -= rq_ppas;
+	if (left_ppas)
+		goto next_rq;
+free_rqd_dma:
+	nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+	return ret;
+}
+
+u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	int bit;
+
+	/* This usually only happens on bad lines */
+	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+	if (bit >= lm->blk_per_line)
+		return -1;
+
+	return bit * geo->sec_per_pl;
+}
+
+static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
+				     u64 paddr, int dir)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct bio *bio;
+	struct nvm_rq rqd;
+	__le64 *lba_list = NULL;
+	int i, ret;
+	int cmd_op, bio_op;
+	int flags;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	if (dir == WRITE) {
+		bio_op = REQ_OP_WRITE;
+		cmd_op = NVM_OP_PWRITE;
+		flags = pblk_set_progr_mode(pblk, WRITE);
+		lba_list = pblk_line_emeta_to_lbas(line->emeta);
+	} else if (dir == READ) {
+		bio_op = REQ_OP_READ;
+		cmd_op = NVM_OP_PREAD;
+		flags = pblk_set_read_mode(pblk);
+	} else
+		return -EINVAL;
+
+	memset(&rqd, 0, sizeof(struct nvm_rq));
+
+	rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd.dma_ppa_list);
+	if (!rqd.ppa_list)
+		return -ENOMEM;
+
+	bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
+	if (IS_ERR(bio)) {
+		ret = PTR_ERR(bio);
+		goto free_ppa_list;
+	}
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, bio_op, 0);
+
+	rqd.bio = bio;
+	rqd.opcode = cmd_op;
+	rqd.flags = flags;
+	rqd.nr_ppas = lm->smeta_sec;
+	rqd.end_io = pblk_end_io_sync;
+	rqd.private = &wait;
+
+	for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+		if (dir == WRITE)
+			lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+	}
+
+	/*
+	 * This I/O is sent by the write thread when a line is replace. Since
+	 * the write thread is the only one sending write and erase commands,
+	 * there is no need to take the LUN semaphore.
+	 */
+	ret = pblk_submit_io(pblk, &rqd);
+	if (ret) {
+		pr_err("pblk: smeta I/O submission failed: %d\n", ret);
+		bio_put(bio);
+		goto free_ppa_list;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: smeta I/O timed out\n");
+	}
+
+	if (rqd.error) {
+		if (dir == WRITE)
+			pblk_log_write_err(pblk, &rqd);
+		else
+			pblk_log_read_err(pblk, &rqd);
+	}
+
+free_ppa_list:
+	nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+
+	return ret;
+}
+
+int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
+{
+	u64 bpaddr = pblk_line_smeta_start(pblk, line);
+
+	return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
+}
+
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+{
+	return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+}
+
+static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
+			    struct ppa_addr ppa)
+{
+	rqd->opcode = NVM_OP_ERASE;
+	rqd->ppa_addr = ppa;
+	rqd->nr_ppas = 1;
+	rqd->flags = pblk_set_progr_mode(pblk, ERASE);
+	rqd->bio = NULL;
+}
+
+static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
+{
+	struct nvm_rq rqd;
+	int ret;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	memset(&rqd, 0, sizeof(struct nvm_rq));
+
+	pblk_setup_e_rq(pblk, &rqd, ppa);
+
+	rqd.end_io = pblk_end_io_sync;
+	rqd.private = &wait;
+
+	/* The write thread schedules erases so that it minimizes disturbances
+	 * with writes. Thus, there is no need to take the LUN semaphore.
+	 */
+	ret = pblk_submit_io(pblk, &rqd);
+	if (ret) {
+		struct nvm_tgt_dev *dev = pblk->dev;
+		struct nvm_geo *geo = &dev->geo;
+
+		pr_err("pblk: could not sync erase line:%d,blk:%d\n",
+					pblk_dev_ppa_to_line(ppa),
+					pblk_dev_ppa_to_pos(geo, ppa));
+
+		rqd.error = ret;
+		goto out;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: sync erase timed out\n");
+	}
+
+out:
+	rqd.private = pblk;
+	__pblk_end_io_erase(pblk, &rqd);
+
+	return 0;
+}
+
+int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct ppa_addr ppa;
+	int bit = -1;
+
+	/* Erase only good blocks, one at a time */
+	do {
+		spin_lock(&line->lock);
+		bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
+								bit + 1);
+		if (bit >= lm->blk_per_line) {
+			spin_unlock(&line->lock);
+			break;
+		}
+
+		ppa = pblk->luns[bit].bppa; /* set ch and lun */
+		ppa.g.blk = line->id;
+
+		atomic_dec(&line->left_eblks);
+		WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
+		spin_unlock(&line->lock);
+
+		if (pblk_blk_erase_sync(pblk, ppa)) {
+			pr_err("pblk: failed to erase line %d\n", line->id);
+			return -ENOMEM;
+		}
+	} while (1);
+
+	return 0;
+}
+
+/* For now lines are always assumed full lines. Thus, smeta former and current
+ * lun bitmaps are omitted.
+ */
+static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+				  struct pblk_line *cur)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct line_smeta *smeta = line->smeta;
+	struct line_emeta *emeta = line->emeta;
+	int nr_blk_line;
+
+	/* After erasing the line, new bad blocks might appear and we risk
+	 * having an invalid line
+	 */
+	nr_blk_line = lm->blk_per_line -
+			bitmap_weight(line->blk_bitmap, lm->blk_per_line);
+	if (nr_blk_line < lm->min_blk_line) {
+		spin_lock(&l_mg->free_lock);
+		spin_lock(&line->lock);
+		line->state = PBLK_LINESTATE_BAD;
+		spin_unlock(&line->lock);
+
+		list_add_tail(&line->list, &l_mg->bad_list);
+		spin_unlock(&l_mg->free_lock);
+
+		pr_debug("pblk: line %d is bad\n", line->id);
+
+		return 0;
+	}
+
+	/* Run-time metadata */
+	line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+
+	/* Mark LUNs allocated in this line (all for now) */
+	bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
+
+	smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
+	memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
+	smeta->header.id = cpu_to_le32(line->id);
+	smeta->header.type = cpu_to_le16(line->type);
+	smeta->header.version = cpu_to_le16(1);
+
+	/* Start metadata */
+	smeta->seq_nr = cpu_to_le64(line->seq_nr);
+	smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+
+	/* Fill metadata among lines */
+	if (cur) {
+		memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
+		smeta->prev_id = cpu_to_le32(cur->id);
+		cur->emeta->next_id = cpu_to_le32(line->id);
+	} else {
+		smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+	}
+
+	/* All smeta must be set at this point */
+	smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
+	smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+
+	/* End metadata */
+	memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
+	emeta->seq_nr = cpu_to_le64(line->seq_nr);
+	emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
+	emeta->nr_valid_lbas = cpu_to_le64(0);
+	emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+	emeta->crc = cpu_to_le32(0);
+	emeta->prev_id = smeta->prev_id;
+
+	return 1;
+}
+
+/* For now lines are always assumed full lines. Thus, smeta former and current
+ * lun bitmaps are omitted.
+ */
+static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
+			     int init)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	int nr_bb = 0;
+	u64 off;
+	int bit = -1;
+
+	line->sec_in_line = lm->sec_per_line;
+
+	/* Capture bad block information on line mapping bitmaps */
+	while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
+					bit + 1)) < lm->blk_per_line) {
+		off = bit * geo->sec_per_pl;
+		bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
+							lm->sec_per_line);
+		bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
+							lm->sec_per_line);
+		line->sec_in_line -= geo->sec_per_blk;
+		if (bit >= lm->emeta_bb)
+			nr_bb++;
+	}
+
+	/* Mark smeta metadata sectors as bad sectors */
+	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+	off = bit * geo->sec_per_pl;
+retry_smeta:
+	bitmap_set(line->map_bitmap, off, lm->smeta_sec);
+	line->sec_in_line -= lm->smeta_sec;
+	line->smeta_ssec = off;
+	line->cur_sec = off + lm->smeta_sec;
+
+	if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
+		pr_debug("pblk: line smeta I/O failed. Retry\n");
+		off += geo->sec_per_pl;
+		goto retry_smeta;
+	}
+
+	bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
+
+	/* Mark emeta metadata sectors as bad sectors. We need to consider bad
+	 * blocks to make sure that there are enough sectors to store emeta
+	 */
+	bit = lm->sec_per_line;
+	off = lm->sec_per_line - lm->emeta_sec;
+	bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+	while (nr_bb) {
+		off -= geo->sec_per_pl;
+		if (!test_bit(off, line->invalid_bitmap)) {
+			bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
+			nr_bb--;
+		}
+	}
+
+	line->sec_in_line -= lm->emeta_sec;
+	line->emeta_ssec = off;
+	line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+
+	if (lm->sec_per_line - line->sec_in_line !=
+		bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
+		spin_lock(&line->lock);
+		line->state = PBLK_LINESTATE_BAD;
+		spin_unlock(&line->lock);
+
+		list_add_tail(&line->list, &l_mg->bad_list);
+		pr_err("pblk: unexpected line %d is bad\n", line->id);
+
+		return 0;
+	}
+
+	return 1;
+}
+
+static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	int blk_in_line = atomic_read(&line->blk_in_line);
+
+	line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
+	if (!line->map_bitmap)
+		return -ENOMEM;
+	memset(line->map_bitmap, 0, lm->sec_bitmap_len);
+
+	/* invalid_bitmap is special since it is used when line is closed. No
+	 * need to zeroized; it will be initialized using bb info form
+	 * map_bitmap
+	 */
+	line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
+	if (!line->invalid_bitmap) {
+		mempool_free(line->map_bitmap, pblk->line_meta_pool);
+		return -ENOMEM;
+	}
+
+	spin_lock(&line->lock);
+	if (line->state != PBLK_LINESTATE_FREE) {
+		spin_unlock(&line->lock);
+		WARN(1, "pblk: corrupted line state\n");
+		return -EINTR;
+	}
+	line->state = PBLK_LINESTATE_OPEN;
+
+	atomic_set(&line->left_eblks, blk_in_line);
+	atomic_set(&line->left_seblks, blk_in_line);
+	spin_unlock(&line->lock);
+
+	/* Bad blocks do not need to be erased */
+	bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
+
+	kref_init(&line->ref);
+
+	return 0;
+}
+
+int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	int ret;
+
+	spin_lock(&l_mg->free_lock);
+	l_mg->data_line = line;
+	list_del(&line->list);
+
+	ret = pblk_line_prepare(pblk, line);
+	if (ret) {
+		list_add(&line->list, &l_mg->free_list);
+		spin_unlock(&l_mg->free_lock);
+		return ret;
+	}
+	spin_unlock(&l_mg->free_lock);
+
+	pblk_rl_free_lines_dec(&pblk->rl, line);
+
+	if (!pblk_line_init_bb(pblk, line, 0)) {
+		list_add(&line->list, &l_mg->free_list);
+		return -EINTR;
+	}
+
+	return 0;
+}
+
+void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
+{
+	mempool_free(line->map_bitmap, pblk->line_meta_pool);
+	line->map_bitmap = NULL;
+	line->smeta = NULL;
+	line->emeta = NULL;
+}
+
+struct pblk_line *pblk_line_get(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line *line = NULL;
+	int bit;
+
+	lockdep_assert_held(&l_mg->free_lock);
+
+retry_get:
+	if (list_empty(&l_mg->free_list)) {
+		pr_err("pblk: no free lines\n");
+		goto out;
+	}
+
+	line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
+	list_del(&line->list);
+	l_mg->nr_free_lines--;
+
+	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+	if (unlikely(bit >= lm->blk_per_line)) {
+		spin_lock(&line->lock);
+		line->state = PBLK_LINESTATE_BAD;
+		spin_unlock(&line->lock);
+
+		list_add_tail(&line->list, &l_mg->bad_list);
+
+		pr_debug("pblk: line %d is bad\n", line->id);
+		goto retry_get;
+	}
+
+	if (pblk_line_prepare(pblk, line)) {
+		pr_err("pblk: failed to prepare line %d\n", line->id);
+		list_add(&line->list, &l_mg->free_list);
+		return NULL;
+	}
+
+out:
+	return line;
+}
+
+static struct pblk_line *pblk_line_retry(struct pblk *pblk,
+					 struct pblk_line *line)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *retry_line;
+
+	spin_lock(&l_mg->free_lock);
+	retry_line = pblk_line_get(pblk);
+	if (!retry_line) {
+		l_mg->data_line = NULL;
+		spin_unlock(&l_mg->free_lock);
+		return NULL;
+	}
+
+	retry_line->smeta = line->smeta;
+	retry_line->emeta = line->emeta;
+	retry_line->meta_line = line->meta_line;
+
+	pblk_line_free(pblk, line);
+	l_mg->data_line = retry_line;
+	spin_unlock(&l_mg->free_lock);
+
+	if (pblk_line_erase(pblk, retry_line)) {
+		spin_lock(&l_mg->free_lock);
+		l_mg->data_line = NULL;
+		spin_unlock(&l_mg->free_lock);
+		return NULL;
+	}
+
+	pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+
+	return retry_line;
+}
+
+struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *line;
+	int meta_line;
+	int is_next = 0;
+
+	spin_lock(&l_mg->free_lock);
+	line = pblk_line_get(pblk);
+	if (!line) {
+		spin_unlock(&l_mg->free_lock);
+		return NULL;
+	}
+
+	line->seq_nr = l_mg->d_seq_nr++;
+	line->type = PBLK_LINETYPE_DATA;
+	l_mg->data_line = line;
+
+	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+	set_bit(meta_line, &l_mg->meta_bitmap);
+	line->smeta = l_mg->sline_meta[meta_line].meta;
+	line->emeta = l_mg->eline_meta[meta_line].meta;
+	line->meta_line = meta_line;
+
+	/* Allocate next line for preparation */
+	l_mg->data_next = pblk_line_get(pblk);
+	if (l_mg->data_next) {
+		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+		l_mg->data_next->type = PBLK_LINETYPE_DATA;
+		is_next = 1;
+	}
+	spin_unlock(&l_mg->free_lock);
+
+	pblk_rl_free_lines_dec(&pblk->rl, line);
+	if (is_next)
+		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+
+	if (pblk_line_erase(pblk, line))
+		return NULL;
+
+retry_setup:
+	if (!pblk_line_set_metadata(pblk, line, NULL)) {
+		line = pblk_line_retry(pblk, line);
+		if (!line)
+			return NULL;
+
+		goto retry_setup;
+	}
+
+	if (!pblk_line_init_bb(pblk, line, 1)) {
+		line = pblk_line_retry(pblk, line);
+		if (!line)
+			return NULL;
+
+		goto retry_setup;
+	}
+
+	return line;
+}
+
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *cur, *new;
+	unsigned int left_seblks;
+	int meta_line;
+	int is_next = 0;
+
+	cur = l_mg->data_line;
+	new = l_mg->data_next;
+	if (!new)
+		return NULL;
+	l_mg->data_line = new;
+
+retry_line:
+	left_seblks = atomic_read(&new->left_seblks);
+	if (left_seblks) {
+		/* If line is not fully erased, erase it */
+		if (atomic_read(&new->left_eblks)) {
+			if (pblk_line_erase(pblk, new))
+				return NULL;
+		} else {
+			io_schedule();
+		}
+		goto retry_line;
+	}
+
+	spin_lock(&l_mg->free_lock);
+	/* Allocate next line for preparation */
+	l_mg->data_next = pblk_line_get(pblk);
+	if (l_mg->data_next) {
+		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+		l_mg->data_next->type = PBLK_LINETYPE_DATA;
+		is_next = 1;
+	}
+
+retry_meta:
+	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+	if (meta_line == PBLK_DATA_LINES) {
+		spin_unlock(&l_mg->free_lock);
+		io_schedule();
+		spin_lock(&l_mg->free_lock);
+		goto retry_meta;
+	}
+
+	set_bit(meta_line, &l_mg->meta_bitmap);
+	new->smeta = l_mg->sline_meta[meta_line].meta;
+	new->emeta = l_mg->eline_meta[meta_line].meta;
+	new->meta_line = meta_line;
+
+	memset(new->smeta, 0, lm->smeta_len);
+	memset(new->emeta, 0, lm->emeta_len);
+	spin_unlock(&l_mg->free_lock);
+
+	if (is_next)
+		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+
+retry_setup:
+	if (!pblk_line_set_metadata(pblk, new, cur)) {
+		new = pblk_line_retry(pblk, new);
+		if (!new)
+			return NULL;
+
+		goto retry_setup;
+	}
+
+	if (!pblk_line_init_bb(pblk, new, 1)) {
+		new = pblk_line_retry(pblk, new);
+		if (!new)
+			return NULL;
+
+		goto retry_setup;
+	}
+
+	return new;
+}
+
+void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
+{
+	if (line->map_bitmap)
+		mempool_free(line->map_bitmap, pblk->line_meta_pool);
+	if (line->invalid_bitmap)
+		mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+
+	line->map_bitmap = NULL;
+	line->invalid_bitmap = NULL;
+	line->smeta = NULL;
+	line->emeta = NULL;
+}
+
+void pblk_line_put(struct kref *ref)
+{
+	struct pblk_line *line = container_of(ref, struct pblk_line, ref);
+	struct pblk *pblk = line->pblk;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+
+	spin_lock(&line->lock);
+	WARN_ON(line->state != PBLK_LINESTATE_GC);
+	line->state = PBLK_LINESTATE_FREE;
+	line->gc_group = PBLK_LINEGC_NONE;
+	pblk_line_free(pblk, line);
+	spin_unlock(&line->lock);
+
+	spin_lock(&l_mg->free_lock);
+	list_add_tail(&line->list, &l_mg->free_list);
+	l_mg->nr_free_lines++;
+	spin_unlock(&l_mg->free_lock);
+
+	pblk_rl_free_lines_inc(&pblk->rl, line);
+}
+
+int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
+{
+	struct nvm_rq *rqd;
+	int err;
+
+	rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
+	memset(rqd, 0, pblk_r_rq_size);
+
+	pblk_setup_e_rq(pblk, rqd, ppa);
+
+	rqd->end_io = pblk_end_io_erase;
+	rqd->private = pblk;
+
+	/* The write thread schedules erases so that it minimizes disturbances
+	 * with writes. Thus, there is no need to take the LUN semaphore.
+	 */
+	err = pblk_submit_io(pblk, rqd);
+	if (err) {
+		struct nvm_tgt_dev *dev = pblk->dev;
+		struct nvm_geo *geo = &dev->geo;
+
+		pr_err("pblk: could not async erase line:%d,blk:%d\n",
+					pblk_dev_ppa_to_line(ppa),
+					pblk_dev_ppa_to_pos(geo, ppa));
+	}
+
+	return err;
+}
+
+struct pblk_line *pblk_line_get_data(struct pblk *pblk)
+{
+	return pblk->l_mg.data_line;
+}
+
+struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+{
+	return pblk->l_mg.data_next;
+}
+
+int pblk_line_is_full(struct pblk_line *line)
+{
+	return (line->left_msecs == 0);
+}
+
+void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct list_head *move_list;
+
+	line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
+
+	if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
+		pr_err("pblk: line %d close I/O failed\n", line->id);
+
+	WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+				"pblk: corrupt closed line %d\n", line->id);
+
+	spin_lock(&l_mg->free_lock);
+	WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
+	spin_unlock(&l_mg->free_lock);
+
+	spin_lock(&l_mg->gc_lock);
+	spin_lock(&line->lock);
+	WARN_ON(line->state != PBLK_LINESTATE_OPEN);
+	line->state = PBLK_LINESTATE_CLOSED;
+	move_list = pblk_line_gc_list(pblk, line);
+
+	list_add_tail(&line->list, move_list);
+
+	mempool_free(line->map_bitmap, pblk->line_meta_pool);
+	line->map_bitmap = NULL;
+	line->smeta = NULL;
+	line->emeta = NULL;
+
+	spin_unlock(&line->lock);
+	spin_unlock(&l_mg->gc_lock);
+}
+
+void pblk_line_close_ws(struct work_struct *work)
+{
+	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
+									ws);
+	struct pblk *pblk = line_ws->pblk;
+	struct pblk_line *line = line_ws->line;
+
+	pblk_line_close(pblk, line);
+	mempool_free(line_ws, pblk->line_ws_pool);
+}
+
+void pblk_line_mark_bb(struct work_struct *work)
+{
+	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
+									ws);
+	struct pblk *pblk = line_ws->pblk;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct ppa_addr *ppa = line_ws->priv;
+	int ret;
+
+	ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
+	if (ret) {
+		struct pblk_line *line;
+		int pos;
+
+		line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
+		pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
+
+		pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
+				line->id, pos);
+	}
+
+	kfree(ppa);
+	mempool_free(line_ws, pblk->line_ws_pool);
+}
+
+void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
+		      void (*work)(struct work_struct *))
+{
+	struct pblk_line_ws *line_ws;
+
+	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC);
+	if (!line_ws)
+		return;
+
+	line_ws->pblk = pblk;
+	line_ws->line = line;
+	line_ws->priv = priv;
+
+	INIT_WORK(&line_ws->ws, work);
+	queue_work(pblk->kw_wq, &line_ws->ws);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+		  unsigned long *lun_bitmap)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_lun *rlun;
+	int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+	int ret;
+
+	/*
+	 * Only send one inflight I/O per LUN. Since we map at a page
+	 * granurality, all ppas in the I/O will map to the same LUN
+	 */
+#ifdef CONFIG_NVM_DEBUG
+	int i;
+
+	for (i = 1; i < nr_ppas; i++)
+		WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+				ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+	/* If the LUN has been locked for this same request, do no attempt to
+	 * lock it again
+	 */
+	if (test_and_set_bit(lun_id, lun_bitmap))
+		return;
+
+	rlun = &pblk->luns[lun_id];
+	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+	if (ret) {
+		switch (ret) {
+		case -ETIME:
+			pr_err("pblk: lun semaphore timed out\n");
+			break;
+		case -EINTR:
+			pr_err("pblk: lun semaphore timed out\n");
+			break;
+		}
+	}
+}
+
+void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+		unsigned long *lun_bitmap)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_lun *rlun;
+	int nr_luns = geo->nr_luns;
+	int bit = -1;
+
+	while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
+		rlun = &pblk->luns[bit];
+		up(&rlun->wr_sem);
+	}
+
+	kfree(lun_bitmap);
+}
+
+void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
+{
+	struct ppa_addr l2p_ppa;
+
+	/* logic error: lba out-of-bounds. Ignore update */
+	if (!(lba < pblk->rl.nr_secs)) {
+		WARN(1, "pblk: corrupted L2P map request\n");
+		return;
+	}
+
+	spin_lock(&pblk->trans_lock);
+	l2p_ppa = pblk_trans_map_get(pblk, lba);
+
+	if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa))
+		pblk_map_invalidate(pblk, l2p_ppa);
+
+	pblk_trans_map_set(pblk, lba, ppa);
+	spin_unlock(&pblk->trans_lock);
+}
+
+void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
+{
+#ifdef CONFIG_NVM_DEBUG
+	/* Callers must ensure that the ppa points to a cache address */
+	BUG_ON(!pblk_addr_in_cache(ppa));
+	BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
+#endif
+
+	pblk_update_map(pblk, lba, ppa);
+}
+
+int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
+		       struct pblk_line *gc_line)
+{
+	struct ppa_addr l2p_ppa;
+	int ret = 1;
+
+#ifdef CONFIG_NVM_DEBUG
+	/* Callers must ensure that the ppa points to a cache address */
+	BUG_ON(!pblk_addr_in_cache(ppa));
+	BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
+#endif
+
+	/* logic error: lba out-of-bounds. Ignore update */
+	if (!(lba < pblk->rl.nr_secs)) {
+		WARN(1, "pblk: corrupted L2P map request\n");
+		return 0;
+	}
+
+	spin_lock(&pblk->trans_lock);
+	l2p_ppa = pblk_trans_map_get(pblk, lba);
+
+	/* Prevent updated entries to be overwritten by GC */
+	if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) ||
+				pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) {
+		ret = 0;
+		goto out;
+	}
+
+	pblk_trans_map_set(pblk, lba, ppa);
+out:
+	spin_unlock(&pblk->trans_lock);
+	return ret;
+}
+
+void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
+			 struct ppa_addr entry_line)
+{
+	struct ppa_addr l2p_line;
+
+#ifdef CONFIG_NVM_DEBUG
+	/* Callers must ensure that the ppa points to a device address */
+	BUG_ON(pblk_addr_in_cache(ppa));
+#endif
+	/* Invalidate and discard padded entries */
+	if (lba == ADDR_EMPTY) {
+#ifdef CONFIG_NVM_DEBUG
+		atomic_long_inc(&pblk->padded_wb);
+#endif
+		pblk_map_invalidate(pblk, ppa);
+		return;
+	}
+
+	/* logic error: lba out-of-bounds. Ignore update */
+	if (!(lba < pblk->rl.nr_secs)) {
+		WARN(1, "pblk: corrupted L2P map request\n");
+		return;
+	}
+
+	spin_lock(&pblk->trans_lock);
+	l2p_line = pblk_trans_map_get(pblk, lba);
+
+	/* Do not update L2P if the cacheline has been updated. In this case,
+	 * the mapped ppa must be invalidated
+	 */
+	if (l2p_line.ppa != entry_line.ppa) {
+		if (!pblk_ppa_empty(ppa))
+			pblk_map_invalidate(pblk, ppa);
+		goto out;
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line));
+#endif
+
+	pblk_trans_map_set(pblk, lba, ppa);
+out:
+	spin_unlock(&pblk->trans_lock);
+}
+
+void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
+			 sector_t blba, int nr_secs)
+{
+	int i;
+
+	spin_lock(&pblk->trans_lock);
+	for (i = 0; i < nr_secs; i++)
+		ppas[i] = pblk_trans_map_get(pblk, blba + i);
+	spin_unlock(&pblk->trans_lock);
+}
+
+void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
+			  u64 *lba_list, int nr_secs)
+{
+	sector_t lba;
+	int i;
+
+	spin_lock(&pblk->trans_lock);
+	for (i = 0; i < nr_secs; i++) {
+		lba = lba_list[i];
+		if (lba == ADDR_EMPTY) {
+			ppas[i].ppa = ADDR_EMPTY;
+		} else {
+			/* logic error: lba out-of-bounds. Ignore update */
+			if (!(lba < pblk->rl.nr_secs)) {
+				WARN(1, "pblk: corrupted L2P map request\n");
+				continue;
+			}
+			ppas[i] = pblk_trans_map_get(pblk, lba);
+		}
+	}
+	spin_unlock(&pblk->trans_lock);
+}
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
new file mode 100644
index 000000000000..eaf479c6b63c
--- /dev/null
+++ b/drivers/lightnvm/pblk-gc.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-gc.c - pblk's garbage collector
+ */
+
+#include "pblk.h"
+#include <linux/delay.h>
+
+static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
+{
+	kfree(gc_rq->data);
+	kfree(gc_rq->lba_list);
+	kfree(gc_rq);
+}
+
+static int pblk_gc_write(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_gc_rq *gc_rq, *tgc_rq;
+	LIST_HEAD(w_list);
+
+	spin_lock(&gc->w_lock);
+	if (list_empty(&gc->w_list)) {
+		spin_unlock(&gc->w_lock);
+		return 1;
+	}
+
+	list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
+		list_move_tail(&gc_rq->list, &w_list);
+		gc->w_entries--;
+	}
+	spin_unlock(&gc->w_lock);
+
+	list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
+		pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
+				gc_rq->nr_secs, gc_rq->secs_to_gc,
+				gc_rq->line, PBLK_IOTYPE_GC);
+
+		kref_put(&gc_rq->line->ref, pblk_line_put);
+
+		list_del(&gc_rq->list);
+		pblk_gc_free_gc_rq(gc_rq);
+	}
+
+	return 0;
+}
+
+static void pblk_gc_writer_kick(struct pblk_gc *gc)
+{
+	wake_up_process(gc->gc_writer_ts);
+}
+
+/*
+ * Responsible for managing all memory related to a gc request. Also in case of
+ * failure
+ */
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
+				   u64 *lba_list, unsigned int nr_secs)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_gc_rq *gc_rq;
+	void *data;
+	unsigned int secs_to_gc;
+	int ret = NVM_IO_OK;
+
+	data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+	if (!data) {
+		ret = NVM_IO_ERR;
+		goto free_lba_list;
+	}
+
+	/* Read from GC victim block */
+	if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+							&secs_to_gc, line)) {
+		ret = NVM_IO_ERR;
+		goto free_data;
+	}
+
+	if (!secs_to_gc)
+		goto free_data;
+
+	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+	if (!gc_rq) {
+		ret = NVM_IO_ERR;
+		goto free_data;
+	}
+
+	gc_rq->line = line;
+	gc_rq->data = data;
+	gc_rq->lba_list = lba_list;
+	gc_rq->nr_secs = nr_secs;
+	gc_rq->secs_to_gc = secs_to_gc;
+
+	kref_get(&line->ref);
+
+retry:
+	spin_lock(&gc->w_lock);
+	if (gc->w_entries > 256) {
+		spin_unlock(&gc->w_lock);
+		usleep_range(256, 1024);
+		goto retry;
+	}
+	gc->w_entries++;
+	list_add_tail(&gc_rq->list, &gc->w_list);
+	spin_unlock(&gc->w_lock);
+
+	pblk_gc_writer_kick(&pblk->gc);
+
+	return NVM_IO_OK;
+
+free_data:
+	kfree(data);
+free_lba_list:
+	kfree(lba_list);
+
+	return ret;
+}
+
+static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct list_head *move_list;
+
+	spin_lock(&line->lock);
+	WARN_ON(line->state != PBLK_LINESTATE_GC);
+	line->state = PBLK_LINESTATE_CLOSED;
+	move_list = pblk_line_gc_list(pblk, line);
+	spin_unlock(&line->lock);
+
+	if (move_list) {
+		spin_lock(&l_mg->gc_lock);
+		list_add_tail(&line->list, move_list);
+		spin_unlock(&l_mg->gc_lock);
+	}
+}
+
+static void pblk_gc_line_ws(struct work_struct *work)
+{
+	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
+									ws);
+	struct pblk *pblk = line_ws->pblk;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *line = line_ws->line;
+	struct pblk_line_meta *lm = &pblk->lm;
+	__le64 *lba_list = line_ws->priv;
+	u64 *gc_list;
+	int sec_left;
+	int nr_ppas, bit;
+	int put_line = 1;
+
+	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+
+	spin_lock(&line->lock);
+	sec_left = line->vsc;
+	if (!sec_left) {
+		/* Lines are erased before being used (l_mg->data_/log_next) */
+		spin_unlock(&line->lock);
+		goto out;
+	}
+	spin_unlock(&line->lock);
+
+	if (sec_left < 0) {
+		pr_err("pblk: corrupted GC line (%d)\n", line->id);
+		put_line = 0;
+		pblk_put_line_back(pblk, line);
+		goto out;
+	}
+
+	bit = -1;
+next_rq:
+	gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
+	if (!gc_list) {
+		put_line = 0;
+		pblk_put_line_back(pblk, line);
+		goto out;
+	}
+
+	nr_ppas = 0;
+	do {
+		bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
+								bit + 1);
+		if (bit > line->emeta_ssec)
+			break;
+
+		gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
+	} while (nr_ppas < pblk->max_write_pgs);
+
+	if (unlikely(!nr_ppas)) {
+		kfree(gc_list);
+		goto out;
+	}
+
+	if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
+		pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
+						line->id, line->vsc,
+						nr_ppas, nr_ppas);
+		put_line = 0;
+		pblk_put_line_back(pblk, line);
+		goto out;
+	}
+
+	sec_left -= nr_ppas;
+	if (sec_left > 0)
+		goto next_rq;
+
+out:
+	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+	mempool_free(line_ws, pblk->line_ws_pool);
+	atomic_dec(&pblk->gc.inflight_gc);
+	if (put_line)
+		kref_put(&line->ref, pblk_line_put);
+}
+
+static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_ws *line_ws;
+	__le64 *lba_list;
+	int ret;
+
+	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+	line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
+								GFP_KERNEL);
+	if (!line->emeta) {
+		pr_err("pblk: cannot use GC emeta\n");
+		goto fail_free_ws;
+	}
+
+	ret = pblk_line_read_emeta(pblk, line);
+	if (ret) {
+		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+		goto fail_free_emeta;
+	}
+
+	/* If this read fails, it means that emeta is corrupted. For now, leave
+	 * the line untouched. TODO: Implement a recovery routine that scans and
+	 * moves all sectors on the line.
+	 */
+	lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
+	if (!lba_list) {
+		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+		goto fail_free_emeta;
+	}
+
+	line_ws->pblk = pblk;
+	line_ws->line = line;
+	line_ws->priv = lba_list;
+
+	INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
+	queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+
+	return 0;
+
+fail_free_emeta:
+	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+fail_free_ws:
+	mempool_free(line_ws, pblk->line_ws_pool);
+	pblk_put_line_back(pblk, line);
+
+	return 1;
+}
+
+static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+{
+	struct pblk_line *line, *tline;
+
+	list_for_each_entry_safe(line, tline, gc_list, list) {
+		if (pblk_gc_line(pblk, line))
+			pr_err("pblk: failed to GC line %d\n", line->id);
+		list_del(&line->list);
+	}
+}
+
+/*
+ * Lines with no valid sectors will be returned to the free list immediately. If
+ * GC is activated - either because the free block count is under the determined
+ * threshold, or because it is being forced from user space - only lines with a
+ * high count of invalid sectors will be recycled.
+ */
+static void pblk_gc_run(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_line *line, *tline;
+	unsigned int nr_blocks_free, nr_blocks_need;
+	struct list_head *group_list;
+	int run_gc, gc_group = 0;
+	int prev_gc = 0;
+	int inflight_gc = atomic_read(&gc->inflight_gc);
+	LIST_HEAD(gc_list);
+
+	spin_lock(&l_mg->gc_lock);
+	list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
+		spin_lock(&line->lock);
+		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
+		line->state = PBLK_LINESTATE_GC;
+		spin_unlock(&line->lock);
+
+		list_del(&line->list);
+		kref_put(&line->ref, pblk_line_put);
+	}
+	spin_unlock(&l_mg->gc_lock);
+
+	nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
+	nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
+	run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+
+next_gc_group:
+	group_list = l_mg->gc_lists[gc_group++];
+	spin_lock(&l_mg->gc_lock);
+	while (run_gc && !list_empty(group_list)) {
+		/* No need to queue up more GC lines than we can handle */
+		if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+			spin_unlock(&l_mg->gc_lock);
+			pblk_gc_lines(pblk, &gc_list);
+			return;
+		}
+
+		line = list_first_entry(group_list, struct pblk_line, list);
+		nr_blocks_free += atomic_read(&line->blk_in_line);
+
+		spin_lock(&line->lock);
+		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
+		line->state = PBLK_LINESTATE_GC;
+		list_move_tail(&line->list, &gc_list);
+		atomic_inc(&gc->inflight_gc);
+		inflight_gc++;
+		spin_unlock(&line->lock);
+
+		prev_gc = 1;
+		run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+	}
+	spin_unlock(&l_mg->gc_lock);
+
+	pblk_gc_lines(pblk, &gc_list);
+
+	if (!prev_gc && pblk->rl.rb_state > gc_group &&
+						gc_group < PBLK_NR_GC_LISTS)
+		goto next_gc_group;
+}
+
+
+static void pblk_gc_kick(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+
+	wake_up_process(gc->gc_ts);
+	pblk_gc_writer_kick(gc);
+	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
+}
+
+static void pblk_gc_timer(unsigned long data)
+{
+	struct pblk *pblk = (struct pblk *)data;
+
+	pblk_gc_kick(pblk);
+}
+
+static int pblk_gc_ts(void *data)
+{
+	struct pblk *pblk = data;
+
+	while (!kthread_should_stop()) {
+		pblk_gc_run(pblk);
+		set_current_state(TASK_INTERRUPTIBLE);
+		io_schedule();
+	}
+
+	return 0;
+}
+
+static int pblk_gc_writer_ts(void *data)
+{
+	struct pblk *pblk = data;
+
+	while (!kthread_should_stop()) {
+		if (!pblk_gc_write(pblk))
+			continue;
+		set_current_state(TASK_INTERRUPTIBLE);
+		io_schedule();
+	}
+
+	return 0;
+}
+
+static void pblk_gc_start(struct pblk *pblk)
+{
+	pblk->gc.gc_active = 1;
+
+	pr_debug("pblk: gc start\n");
+}
+
+int pblk_gc_status(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+	int ret;
+
+	spin_lock(&gc->lock);
+	ret = gc->gc_active;
+	spin_unlock(&gc->lock);
+
+	return ret;
+}
+
+static void __pblk_gc_should_start(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+
+	lockdep_assert_held(&gc->lock);
+
+	if (gc->gc_enabled && !gc->gc_active)
+		pblk_gc_start(pblk);
+}
+
+void pblk_gc_should_start(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+
+	spin_lock(&gc->lock);
+	__pblk_gc_should_start(pblk);
+	spin_unlock(&gc->lock);
+}
+
+/*
+ * If flush_wq == 1 then no lock should be held by the caller since
+ * flush_workqueue can sleep
+ */
+static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
+{
+	spin_lock(&pblk->gc.lock);
+	pblk->gc.gc_active = 0;
+	spin_unlock(&pblk->gc.lock);
+
+	pr_debug("pblk: gc stop\n");
+}
+
+void pblk_gc_should_stop(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+
+	if (gc->gc_active && !gc->gc_forced)
+		pblk_gc_stop(pblk, 0);
+}
+
+void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
+			      int *gc_active)
+{
+	struct pblk_gc *gc = &pblk->gc;
+
+	spin_lock(&gc->lock);
+	*gc_enabled = gc->gc_enabled;
+	*gc_active = gc->gc_active;
+	spin_unlock(&gc->lock);
+}
+
+void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+{
+	struct pblk_gc *gc = &pblk->gc;
+	int rsv = 0;
+
+	spin_lock(&gc->lock);
+	if (force) {
+		gc->gc_enabled = 1;
+		rsv = 64;
+	}
+	pblk_rl_set_gc_rsc(&pblk->rl, rsv);
+	gc->gc_forced = force;
+	__pblk_gc_should_start(pblk);
+	spin_unlock(&gc->lock);
+}
+
+int pblk_gc_init(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+	int ret;
+
+	gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
+	if (IS_ERR(gc->gc_ts)) {
+		pr_err("pblk: could not allocate GC main kthread\n");
+		return PTR_ERR(gc->gc_ts);
+	}
+
+	gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
+							"pblk-gc-writer-ts");
+	if (IS_ERR(gc->gc_writer_ts)) {
+		pr_err("pblk: could not allocate GC writer kthread\n");
+		ret = PTR_ERR(gc->gc_writer_ts);
+		goto fail_free_main_kthread;
+	}
+
+	setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
+	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
+
+	gc->gc_active = 0;
+	gc->gc_forced = 0;
+	gc->gc_enabled = 1;
+	gc->gc_jobs_active = 8;
+	gc->w_entries = 0;
+	atomic_set(&gc->inflight_gc, 0);
+
+	gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+	if (!gc->gc_reader_wq) {
+		pr_err("pblk: could not allocate GC reader workqueue\n");
+		ret = -ENOMEM;
+		goto fail_free_writer_kthread;
+	}
+
+	spin_lock_init(&gc->lock);
+	spin_lock_init(&gc->w_lock);
+	INIT_LIST_HEAD(&gc->w_list);
+
+	return 0;
+
+fail_free_writer_kthread:
+	kthread_stop(gc->gc_writer_ts);
+fail_free_main_kthread:
+	kthread_stop(gc->gc_ts);
+
+	return ret;
+}
+
+void pblk_gc_exit(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+
+	flush_workqueue(gc->gc_reader_wq);
+
+	del_timer(&gc->gc_timer);
+	pblk_gc_stop(pblk, 1);
+
+	if (gc->gc_ts)
+		kthread_stop(gc->gc_ts);
+
+	if (pblk->gc.gc_reader_wq)
+		destroy_workqueue(pblk->gc.gc_reader_wq);
+
+	if (gc->gc_writer_ts)
+		kthread_stop(gc->gc_writer_ts);
+}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
new file mode 100644
index 000000000000..ae8cd6d5af8b
--- /dev/null
+++ b/drivers/lightnvm/pblk-init.c
@@ -0,0 +1,962 @@
+/*
+ * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Implementation of a physical block-device target for Open-channel SSDs.
+ *
+ * pblk-init.c - pblk's initialization.
+ */
+
+#include "pblk.h"
+
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
+					*pblk_w_rq_cache, *pblk_line_meta_cache;
+static DECLARE_RWSEM(pblk_lock);
+
+static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
+			  struct bio *bio)
+{
+	int ret;
+
+	/* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
+	 * constraint. Writes can be of arbitrary size.
+	 */
+	if (bio_data_dir(bio) == READ) {
+		blk_queue_split(q, &bio, q->bio_split);
+		ret = pblk_submit_read(pblk, bio);
+		if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
+			bio_put(bio);
+
+		return ret;
+	}
+
+	/* Prevent deadlock in the case of a modest LUN configuration and large
+	 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
+	 * available for user I/O.
+	 */
+	if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
+		blk_queue_split(q, &bio, q->bio_split);
+
+	return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
+}
+
+static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
+{
+	struct pblk *pblk = q->queuedata;
+
+	if (bio_op(bio) == REQ_OP_DISCARD) {
+		pblk_discard(pblk, bio);
+		if (!(bio->bi_opf & REQ_PREFLUSH)) {
+			bio_endio(bio);
+			return BLK_QC_T_NONE;
+		}
+	}
+
+	switch (pblk_rw_io(q, pblk, bio)) {
+	case NVM_IO_ERR:
+		bio_io_error(bio);
+		break;
+	case NVM_IO_DONE:
+		bio_endio(bio);
+		break;
+	}
+
+	return BLK_QC_T_NONE;
+}
+
+static void pblk_l2p_free(struct pblk *pblk)
+{
+	vfree(pblk->trans_map);
+}
+
+static int pblk_l2p_init(struct pblk *pblk)
+{
+	sector_t i;
+	struct ppa_addr ppa;
+	int entry_size = 8;
+
+	if (pblk->ppaf_bitsize < 32)
+		entry_size = 4;
+
+	pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs);
+	if (!pblk->trans_map)
+		return -ENOMEM;
+
+	pblk_ppa_set_empty(&ppa);
+
+	for (i = 0; i < pblk->rl.nr_secs; i++)
+		pblk_trans_map_set(pblk, i, ppa);
+
+	return 0;
+}
+
+static void pblk_rwb_free(struct pblk *pblk)
+{
+	if (pblk_rb_tear_down_check(&pblk->rwb))
+		pr_err("pblk: write buffer error on tear down\n");
+
+	pblk_rb_data_free(&pblk->rwb);
+	vfree(pblk_rb_entries_ref(&pblk->rwb));
+}
+
+static int pblk_rwb_init(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_rb_entry *entries;
+	unsigned long nr_entries;
+	unsigned int power_size, power_seg_sz;
+
+	nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
+
+	entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
+	if (!entries)
+		return -ENOMEM;
+
+	power_size = get_count_order(nr_entries);
+	power_seg_sz = get_count_order(geo->sec_size);
+
+	return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
+}
+
+/* Minimum pages needed within a lun */
+#define PAGE_POOL_SIZE 16
+#define ADDR_POOL_SIZE 64
+
+static int pblk_set_ppaf(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct nvm_addr_format ppaf = geo->ppaf;
+	int power_len;
+
+	/* Re-calculate channel and lun format to adapt to configuration */
+	power_len = get_count_order(geo->nr_chnls);
+	if (1 << power_len != geo->nr_chnls) {
+		pr_err("pblk: supports only power-of-two channel config.\n");
+		return -EINVAL;
+	}
+	ppaf.ch_len = power_len;
+
+	power_len = get_count_order(geo->luns_per_chnl);
+	if (1 << power_len != geo->luns_per_chnl) {
+		pr_err("pblk: supports only power-of-two LUN config.\n");
+		return -EINVAL;
+	}
+	ppaf.lun_len = power_len;
+
+	pblk->ppaf.sec_offset = 0;
+	pblk->ppaf.pln_offset = ppaf.sect_len;
+	pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len;
+	pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len;
+	pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len;
+	pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len;
+	pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1;
+	pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) <<
+							pblk->ppaf.pln_offset;
+	pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) <<
+							pblk->ppaf.ch_offset;
+	pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) <<
+							pblk->ppaf.lun_offset;
+	pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) <<
+							pblk->ppaf.pg_offset;
+	pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) <<
+							pblk->ppaf.blk_offset;
+
+	pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len;
+
+	return 0;
+}
+
+static int pblk_init_global_caches(struct pblk *pblk)
+{
+	char cache_name[PBLK_CACHE_NAME_LEN];
+
+	down_write(&pblk_lock);
+	pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws",
+				sizeof(struct pblk_line_ws), 0, 0, NULL);
+	if (!pblk_blk_ws_cache) {
+		up_write(&pblk_lock);
+		return -ENOMEM;
+	}
+
+	pblk_rec_cache = kmem_cache_create("pblk_rec",
+				sizeof(struct pblk_rec_ctx), 0, 0, NULL);
+	if (!pblk_rec_cache) {
+		kmem_cache_destroy(pblk_blk_ws_cache);
+		up_write(&pblk_lock);
+		return -ENOMEM;
+	}
+
+	pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+				0, 0, NULL);
+	if (!pblk_r_rq_cache) {
+		kmem_cache_destroy(pblk_blk_ws_cache);
+		kmem_cache_destroy(pblk_rec_cache);
+		up_write(&pblk_lock);
+		return -ENOMEM;
+	}
+
+	pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
+				0, 0, NULL);
+	if (!pblk_w_rq_cache) {
+		kmem_cache_destroy(pblk_blk_ws_cache);
+		kmem_cache_destroy(pblk_rec_cache);
+		kmem_cache_destroy(pblk_r_rq_cache);
+		up_write(&pblk_lock);
+		return -ENOMEM;
+	}
+
+	snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s",
+							pblk->disk->disk_name);
+	pblk_line_meta_cache = kmem_cache_create(cache_name,
+				pblk->lm.sec_bitmap_len, 0, 0, NULL);
+	if (!pblk_line_meta_cache) {
+		kmem_cache_destroy(pblk_blk_ws_cache);
+		kmem_cache_destroy(pblk_rec_cache);
+		kmem_cache_destroy(pblk_r_rq_cache);
+		kmem_cache_destroy(pblk_w_rq_cache);
+		up_write(&pblk_lock);
+		return -ENOMEM;
+	}
+	up_write(&pblk_lock);
+
+	return 0;
+}
+
+static int pblk_core_init(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int max_write_ppas;
+	int mod;
+
+	pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+	max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+	pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+				max_write_ppas : nvm_max_phys_sects(dev);
+	pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
+						geo->nr_planes * geo->nr_luns;
+
+	if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+		pr_err("pblk: cannot support device max_phys_sect\n");
+		return -EINVAL;
+	}
+
+	div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+	if (mod) {
+		pr_err("pblk: bad configuration of sectors/pages\n");
+		return -EINVAL;
+	}
+
+	if (pblk_init_global_caches(pblk))
+		return -ENOMEM;
+
+	pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
+	if (!pblk->page_pool)
+		return -ENOMEM;
+
+	pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+							pblk_blk_ws_cache);
+	if (!pblk->line_ws_pool)
+		goto free_page_pool;
+
+	pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache);
+	if (!pblk->rec_pool)
+		goto free_blk_ws_pool;
+
+	pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
+	if (!pblk->r_rq_pool)
+		goto free_rec_pool;
+
+	pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+	if (!pblk->w_rq_pool)
+		goto free_r_rq_pool;
+
+	pblk->line_meta_pool =
+			mempool_create_slab_pool(16, pblk_line_meta_cache);
+	if (!pblk->line_meta_pool)
+		goto free_w_rq_pool;
+
+	pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
+					WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+	if (!pblk->kw_wq)
+		goto free_line_meta_pool;
+
+	if (pblk_set_ppaf(pblk))
+		goto free_kw_wq;
+
+	if (pblk_rwb_init(pblk))
+		goto free_kw_wq;
+
+	INIT_LIST_HEAD(&pblk->compl_list);
+	return 0;
+
+free_kw_wq:
+	destroy_workqueue(pblk->kw_wq);
+free_line_meta_pool:
+	mempool_destroy(pblk->line_meta_pool);
+free_w_rq_pool:
+	mempool_destroy(pblk->w_rq_pool);
+free_r_rq_pool:
+	mempool_destroy(pblk->r_rq_pool);
+free_rec_pool:
+	mempool_destroy(pblk->rec_pool);
+free_blk_ws_pool:
+	mempool_destroy(pblk->line_ws_pool);
+free_page_pool:
+	mempool_destroy(pblk->page_pool);
+	return -ENOMEM;
+}
+
+static void pblk_core_free(struct pblk *pblk)
+{
+	if (pblk->kw_wq)
+		destroy_workqueue(pblk->kw_wq);
+
+	mempool_destroy(pblk->page_pool);
+	mempool_destroy(pblk->line_ws_pool);
+	mempool_destroy(pblk->rec_pool);
+	mempool_destroy(pblk->r_rq_pool);
+	mempool_destroy(pblk->w_rq_pool);
+	mempool_destroy(pblk->line_meta_pool);
+
+	kmem_cache_destroy(pblk_blk_ws_cache);
+	kmem_cache_destroy(pblk_rec_cache);
+	kmem_cache_destroy(pblk_r_rq_cache);
+	kmem_cache_destroy(pblk_w_rq_cache);
+	kmem_cache_destroy(pblk_line_meta_cache);
+}
+
+static void pblk_luns_free(struct pblk *pblk)
+{
+	kfree(pblk->luns);
+}
+
+static void pblk_lines_free(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *line;
+	int i;
+
+	spin_lock(&l_mg->free_lock);
+	for (i = 0; i < l_mg->nr_lines; i++) {
+		line = &pblk->lines[i];
+
+		pblk_line_free(pblk, line);
+		kfree(line->blk_bitmap);
+		kfree(line->erase_bitmap);
+	}
+	spin_unlock(&l_mg->free_lock);
+}
+
+static void pblk_line_meta_free(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	int i;
+
+	kfree(l_mg->bb_template);
+	kfree(l_mg->bb_aux);
+
+	for (i = 0; i < PBLK_DATA_LINES; i++) {
+		pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
+		pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+	}
+
+	kfree(pblk->lines);
+}
+
+static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
+{
+	struct nvm_geo *geo = &dev->geo;
+	struct ppa_addr ppa;
+	u8 *blks;
+	int nr_blks, ret;
+
+	nr_blks = geo->blks_per_lun * geo->plane_mode;
+	blks = kmalloc(nr_blks, GFP_KERNEL);
+	if (!blks)
+		return -ENOMEM;
+
+	ppa.ppa = 0;
+	ppa.g.ch = rlun->bppa.g.ch;
+	ppa.g.lun = rlun->bppa.g.lun;
+
+	ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
+	if (ret)
+		goto out;
+
+	nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
+	if (nr_blks < 0) {
+		ret = nr_blks;
+		goto out;
+	}
+
+	rlun->bb_list = blks;
+
+	return 0;
+out:
+	kfree(blks);
+	return ret;
+}
+
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_lun *rlun;
+	int bb_cnt = 0;
+	int i;
+
+	line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
+	if (!line->blk_bitmap)
+		return -ENOMEM;
+
+	line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
+	if (!line->erase_bitmap) {
+		kfree(line->blk_bitmap);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < lm->blk_per_line; i++) {
+		rlun = &pblk->luns[i];
+		if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+			continue;
+
+		set_bit(i, line->blk_bitmap);
+		bb_cnt++;
+	}
+
+	return bb_cnt;
+}
+
+static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_lun *rlun;
+	int i, ret;
+
+	/* TODO: Implement unbalanced LUN support */
+	if (geo->luns_per_chnl < 0) {
+		pr_err("pblk: unbalanced LUN config.\n");
+		return -EINVAL;
+	}
+
+	pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL);
+	if (!pblk->luns)
+		return -ENOMEM;
+
+	for (i = 0; i < geo->nr_luns; i++) {
+		/* Stripe across channels */
+		int ch = i % geo->nr_chnls;
+		int lun_raw = i / geo->nr_chnls;
+		int lunid = lun_raw + ch * geo->luns_per_chnl;
+
+		rlun = &pblk->luns[i];
+		rlun->bppa = luns[lunid];
+
+		sema_init(&rlun->wr_sem, 1);
+
+		ret = pblk_bb_discovery(dev, rlun);
+		if (ret) {
+			while (--i >= 0)
+				kfree(pblk->luns[i].bb_list);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int pblk_lines_configure(struct pblk *pblk, int flags)
+{
+	struct pblk_line *line = NULL;
+	int ret = 0;
+
+	if (!(flags & NVM_TARGET_FACTORY)) {
+		line = pblk_recov_l2p(pblk);
+		if (IS_ERR(line)) {
+			pr_err("pblk: could not recover l2p table\n");
+			ret = -EFAULT;
+		}
+	}
+
+	if (!line) {
+		/* Configure next line for user data */
+		line = pblk_line_get_first_data(pblk);
+		if (!line) {
+			pr_err("pblk: line list corrupted\n");
+			ret = -EFAULT;
+		}
+	}
+
+	return ret;
+}
+
+/* See comment over struct line_emeta definition */
+static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+{
+	return (sizeof(struct line_emeta) +
+			((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
+			(pblk->l_mg.nr_lines * sizeof(u32)) +
+			lm->blk_bitmap_len);
+}
+
+static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	sector_t provisioned;
+
+	pblk->over_pct = 20;
+
+	provisioned = nr_free_blks;
+	provisioned *= (100 - pblk->over_pct);
+	sector_div(provisioned, 100);
+
+	/* Internally pblk manages all free blocks, but all calculations based
+	 * on user capacity consider only provisioned blocks
+	 */
+	pblk->rl.total_blocks = nr_free_blks;
+	pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk;
+	pblk->capacity = provisioned * geo->sec_per_blk;
+	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
+}
+
+static int pblk_lines_init(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line *line;
+	unsigned int smeta_len, emeta_len;
+	long nr_bad_blks, nr_meta_blks, nr_free_blks;
+	int bb_distance;
+	int i;
+	int ret;
+
+	lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
+	lm->blk_per_line = geo->nr_luns;
+	lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
+	lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
+	lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
+	lm->high_thrs = lm->sec_per_line / 2;
+	lm->mid_thrs = lm->sec_per_line / 4;
+
+	/* Calculate necessary pages for smeta. See comment over struct
+	 * line_smeta definition
+	 */
+	lm->smeta_len = sizeof(struct line_smeta) +
+				PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+
+	i = 1;
+add_smeta_page:
+	lm->smeta_sec = i * geo->sec_per_pl;
+	lm->smeta_len = lm->smeta_sec * geo->sec_size;
+
+	smeta_len = sizeof(struct line_smeta) +
+				PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+	if (smeta_len > lm->smeta_len) {
+		i++;
+		goto add_smeta_page;
+	}
+
+	/* Calculate necessary pages for emeta. See comment over struct
+	 * line_emeta definition
+	 */
+	i = 1;
+add_emeta_page:
+	lm->emeta_sec = i * geo->sec_per_pl;
+	lm->emeta_len = lm->emeta_sec * geo->sec_size;
+
+	emeta_len = calc_emeta_len(pblk, lm);
+	if (emeta_len > lm->emeta_len) {
+		i++;
+		goto add_emeta_page;
+	}
+	lm->emeta_bb = geo->nr_luns - i;
+
+	nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
+				(geo->sec_per_blk / 2)) / geo->sec_per_blk;
+	lm->min_blk_line = nr_meta_blks + 1;
+
+	l_mg->nr_lines = geo->blks_per_lun;
+	l_mg->log_line = l_mg->data_line = NULL;
+	l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+	l_mg->nr_free_lines = 0;
+	bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+
+	/* smeta is always small enough to fit on a kmalloc memory allocation,
+	 * emeta depends on the number of LUNs allocated to the pblk instance
+	 */
+	l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
+	for (i = 0; i < PBLK_DATA_LINES; i++) {
+		l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
+		if (!l_mg->sline_meta[i].meta)
+			while (--i >= 0) {
+				kfree(l_mg->sline_meta[i].meta);
+				ret = -ENOMEM;
+				goto fail;
+			}
+	}
+
+	if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
+		l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+		for (i = 0; i < PBLK_DATA_LINES; i++) {
+			l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
+			if (!l_mg->eline_meta[i].meta)
+				while (--i >= 0) {
+					vfree(l_mg->eline_meta[i].meta);
+					ret = -ENOMEM;
+					goto fail;
+				}
+		}
+	} else {
+		l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+		for (i = 0; i < PBLK_DATA_LINES; i++) {
+			l_mg->eline_meta[i].meta =
+					kmalloc(lm->emeta_len, GFP_KERNEL);
+			if (!l_mg->eline_meta[i].meta)
+				while (--i >= 0) {
+					kfree(l_mg->eline_meta[i].meta);
+					ret = -ENOMEM;
+					goto fail;
+				}
+		}
+	}
+
+	l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+	if (!l_mg->bb_template) {
+		ret = -ENOMEM;
+		goto fail_free_meta;
+	}
+
+	l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+	if (!l_mg->bb_aux) {
+		ret = -ENOMEM;
+		goto fail_free_bb_template;
+	}
+
+	bb_distance = (geo->nr_luns) * geo->sec_per_pl;
+	for (i = 0; i < lm->sec_per_line; i += bb_distance)
+		bitmap_set(l_mg->bb_template, i, geo->sec_per_pl);
+
+	INIT_LIST_HEAD(&l_mg->free_list);
+	INIT_LIST_HEAD(&l_mg->corrupt_list);
+	INIT_LIST_HEAD(&l_mg->bad_list);
+	INIT_LIST_HEAD(&l_mg->gc_full_list);
+	INIT_LIST_HEAD(&l_mg->gc_high_list);
+	INIT_LIST_HEAD(&l_mg->gc_mid_list);
+	INIT_LIST_HEAD(&l_mg->gc_low_list);
+	INIT_LIST_HEAD(&l_mg->gc_empty_list);
+
+	l_mg->gc_lists[0] = &l_mg->gc_high_list;
+	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
+	l_mg->gc_lists[2] = &l_mg->gc_low_list;
+
+	spin_lock_init(&l_mg->free_lock);
+	spin_lock_init(&l_mg->gc_lock);
+
+	pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
+								GFP_KERNEL);
+	if (!pblk->lines) {
+		ret = -ENOMEM;
+		goto fail_free_bb_aux;
+	}
+
+	nr_free_blks = 0;
+	for (i = 0; i < l_mg->nr_lines; i++) {
+		int blk_in_line;
+
+		line = &pblk->lines[i];
+
+		line->pblk = pblk;
+		line->id = i;
+		line->type = PBLK_LINETYPE_FREE;
+		line->state = PBLK_LINESTATE_FREE;
+		line->gc_group = PBLK_LINEGC_NONE;
+		spin_lock_init(&line->lock);
+
+		nr_bad_blks = pblk_bb_line(pblk, line);
+		if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+			ret = -EINVAL;
+			goto fail_free_lines;
+		}
+
+		blk_in_line = lm->blk_per_line - nr_bad_blks;
+		if (blk_in_line < lm->min_blk_line) {
+			line->state = PBLK_LINESTATE_BAD;
+			list_add_tail(&line->list, &l_mg->bad_list);
+			continue;
+		}
+
+		nr_free_blks += blk_in_line;
+		atomic_set(&line->blk_in_line, blk_in_line);
+
+		l_mg->nr_free_lines++;
+		list_add_tail(&line->list, &l_mg->free_list);
+	}
+
+	pblk_set_provision(pblk, nr_free_blks);
+
+	sema_init(&pblk->erase_sem, 1);
+
+	/* Cleanup per-LUN bad block lists - managed within lines on run-time */
+	for (i = 0; i < geo->nr_luns; i++)
+		kfree(pblk->luns[i].bb_list);
+
+	return 0;
+fail_free_lines:
+	kfree(pblk->lines);
+fail_free_bb_aux:
+	kfree(l_mg->bb_aux);
+fail_free_bb_template:
+	kfree(l_mg->bb_template);
+fail_free_meta:
+	for (i = 0; i < PBLK_DATA_LINES; i++) {
+		pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
+		pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+	}
+fail:
+	for (i = 0; i < geo->nr_luns; i++)
+		kfree(pblk->luns[i].bb_list);
+
+	return ret;
+}
+
+static int pblk_writer_init(struct pblk *pblk)
+{
+	setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk);
+	mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
+
+	pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
+	if (IS_ERR(pblk->writer_ts)) {
+		pr_err("pblk: could not allocate writer kthread\n");
+		return PTR_ERR(pblk->writer_ts);
+	}
+
+	return 0;
+}
+
+static void pblk_writer_stop(struct pblk *pblk)
+{
+	if (pblk->writer_ts)
+		kthread_stop(pblk->writer_ts);
+	del_timer(&pblk->wtimer);
+}
+
+static void pblk_free(struct pblk *pblk)
+{
+	pblk_luns_free(pblk);
+	pblk_lines_free(pblk);
+	pblk_line_meta_free(pblk);
+	pblk_core_free(pblk);
+	pblk_l2p_free(pblk);
+
+	kfree(pblk);
+}
+
+static void pblk_tear_down(struct pblk *pblk)
+{
+	pblk_flush_writer(pblk);
+	pblk_writer_stop(pblk);
+	pblk_rb_sync_l2p(&pblk->rwb);
+	pblk_recov_pad(pblk);
+	pblk_rwb_free(pblk);
+	pblk_rl_free(&pblk->rl);
+
+	pr_debug("pblk: consistent tear down\n");
+}
+
+static void pblk_exit(void *private)
+{
+	struct pblk *pblk = private;
+
+	down_write(&pblk_lock);
+	pblk_gc_exit(pblk);
+	pblk_tear_down(pblk);
+	pblk_free(pblk);
+	up_write(&pblk_lock);
+}
+
+static sector_t pblk_capacity(void *private)
+{
+	struct pblk *pblk = private;
+
+	return pblk->capacity * NR_PHY_IN_LOG;
+}
+
+static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
+		       int flags)
+{
+	struct nvm_geo *geo = &dev->geo;
+	struct request_queue *bqueue = dev->q;
+	struct request_queue *tqueue = tdisk->queue;
+	struct pblk *pblk;
+	int ret;
+
+	if (dev->identity.dom & NVM_RSP_L2P) {
+		pr_err("pblk: device-side L2P table not supported. (%x)\n",
+							dev->identity.dom);
+		return ERR_PTR(-EINVAL);
+	}
+
+	pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
+	if (!pblk)
+		return ERR_PTR(-ENOMEM);
+
+	pblk->dev = dev;
+	pblk->disk = tdisk;
+
+	spin_lock_init(&pblk->trans_lock);
+	spin_lock_init(&pblk->lock);
+
+	if (flags & NVM_TARGET_FACTORY)
+		pblk_setup_uuid(pblk);
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_set(&pblk->inflight_writes, 0);
+	atomic_long_set(&pblk->padded_writes, 0);
+	atomic_long_set(&pblk->padded_wb, 0);
+	atomic_long_set(&pblk->nr_flush, 0);
+	atomic_long_set(&pblk->req_writes, 0);
+	atomic_long_set(&pblk->sub_writes, 0);
+	atomic_long_set(&pblk->sync_writes, 0);
+	atomic_long_set(&pblk->compl_writes, 0);
+	atomic_long_set(&pblk->inflight_reads, 0);
+	atomic_long_set(&pblk->sync_reads, 0);
+	atomic_long_set(&pblk->recov_writes, 0);
+	atomic_long_set(&pblk->recov_writes, 0);
+	atomic_long_set(&pblk->recov_gc_writes, 0);
+#endif
+
+	atomic_long_set(&pblk->read_failed, 0);
+	atomic_long_set(&pblk->read_empty, 0);
+	atomic_long_set(&pblk->read_high_ecc, 0);
+	atomic_long_set(&pblk->read_failed_gc, 0);
+	atomic_long_set(&pblk->write_failed, 0);
+	atomic_long_set(&pblk->erase_failed, 0);
+
+	ret = pblk_luns_init(pblk, dev->luns);
+	if (ret) {
+		pr_err("pblk: could not initialize luns\n");
+		goto fail;
+	}
+
+	ret = pblk_lines_init(pblk);
+	if (ret) {
+		pr_err("pblk: could not initialize lines\n");
+		goto fail_free_luns;
+	}
+
+	ret = pblk_core_init(pblk);
+	if (ret) {
+		pr_err("pblk: could not initialize core\n");
+		goto fail_free_line_meta;
+	}
+
+	ret = pblk_l2p_init(pblk);
+	if (ret) {
+		pr_err("pblk: could not initialize maps\n");
+		goto fail_free_core;
+	}
+
+	ret = pblk_lines_configure(pblk, flags);
+	if (ret) {
+		pr_err("pblk: could not configure lines\n");
+		goto fail_free_l2p;
+	}
+
+	ret = pblk_writer_init(pblk);
+	if (ret) {
+		pr_err("pblk: could not initialize write thread\n");
+		goto fail_free_lines;
+	}
+
+	ret = pblk_gc_init(pblk);
+	if (ret) {
+		pr_err("pblk: could not initialize gc\n");
+		goto fail_stop_writer;
+	}
+
+	/* inherit the size from the underlying device */
+	blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
+	blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
+
+	blk_queue_write_cache(tqueue, true, false);
+
+	tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size;
+	tqueue->limits.discard_alignment = 0;
+	blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue);
+
+	pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
+			geo->nr_luns, pblk->l_mg.nr_lines,
+			(unsigned long long)pblk->rl.nr_secs,
+			pblk->rwb.nr_entries);
+
+	wake_up_process(pblk->writer_ts);
+	return pblk;
+
+fail_stop_writer:
+	pblk_writer_stop(pblk);
+fail_free_lines:
+	pblk_lines_free(pblk);
+fail_free_l2p:
+	pblk_l2p_free(pblk);
+fail_free_core:
+	pblk_core_free(pblk);
+fail_free_line_meta:
+	pblk_line_meta_free(pblk);
+fail_free_luns:
+	pblk_luns_free(pblk);
+fail:
+	kfree(pblk);
+	return ERR_PTR(ret);
+}
+
+/* physical block device target */
+static struct nvm_tgt_type tt_pblk = {
+	.name		= "pblk",
+	.version	= {1, 0, 0},
+
+	.make_rq	= pblk_make_rq,
+	.capacity	= pblk_capacity,
+
+	.init		= pblk_init,
+	.exit		= pblk_exit,
+
+	.sysfs_init	= pblk_sysfs_init,
+	.sysfs_exit	= pblk_sysfs_exit,
+};
+
+static int __init pblk_module_init(void)
+{
+	return nvm_register_tgt_type(&tt_pblk);
+}
+
+static void pblk_module_exit(void)
+{
+	nvm_unregister_tgt_type(&tt_pblk);
+}
+
+module_init(pblk_module_init);
+module_exit(pblk_module_exit);
+MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
+MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
new file mode 100644
index 000000000000..17c16955284d
--- /dev/null
+++ b/drivers/lightnvm/pblk-map.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-map.c - pblk's lba-ppa mapping strategy
+ *
+ */
+
+#include "pblk.h"
+
+static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
+			       struct ppa_addr *ppa_list,
+			       unsigned long *lun_bitmap,
+			       struct pblk_sec_meta *meta_list,
+			       unsigned int valid_secs)
+{
+	struct pblk_line *line = pblk_line_get_data(pblk);
+	struct line_emeta *emeta = line->emeta;
+	struct pblk_w_ctx *w_ctx;
+	__le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+	u64 paddr;
+	int nr_secs = pblk->min_write_pgs;
+	int i;
+
+	paddr = pblk_alloc_page(pblk, line, nr_secs);
+
+	for (i = 0; i < nr_secs; i++, paddr++) {
+		/* ppa to be sent to the device */
+		ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+
+		/* Write context for target bio completion on write buffer. Note
+		 * that the write buffer is protected by the sync backpointer,
+		 * and a single writer thread have access to each specific entry
+		 * at a time. Thus, it is safe to modify the context for the
+		 * entry we are setting up for submission without taking any
+		 * lock or memory barrier.
+		 */
+		if (i < valid_secs) {
+			kref_get(&line->ref);
+			w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
+			w_ctx->ppa = ppa_list[i];
+			meta_list[i].lba = cpu_to_le64(w_ctx->lba);
+			lba_list[paddr] = cpu_to_le64(w_ctx->lba);
+			le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+		} else {
+			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
+			lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+			pblk_map_pad_invalidate(pblk, line, paddr);
+		}
+	}
+
+	if (pblk_line_is_full(line)) {
+		line = pblk_line_replace_data(pblk);
+		if (!line)
+			return;
+	}
+
+	pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
+}
+
+void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
+		 unsigned long *lun_bitmap, unsigned int valid_secs,
+		 unsigned int off)
+{
+	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	unsigned int map_secs;
+	int min = pblk->min_write_pgs;
+	int i;
+
+	for (i = off; i < rqd->nr_ppas; i += min) {
+		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
+		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+					lun_bitmap, &meta_list[i], map_secs);
+	}
+}
+
+/* only if erase_ppa is set, acquire erase semaphore */
+void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
+		       unsigned int sentry, unsigned long *lun_bitmap,
+		       unsigned int valid_secs, struct ppa_addr *erase_ppa)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	unsigned int map_secs;
+	int min = pblk->min_write_pgs;
+	int i, erase_lun;
+
+	for (i = 0; i < rqd->nr_ppas; i += min) {
+		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
+		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+					lun_bitmap, &meta_list[i], map_secs);
+
+		erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
+							rqd->ppa_list[i].g.ch;
+
+		if (!test_bit(erase_lun, e_line->erase_bitmap)) {
+			if (down_trylock(&pblk->erase_sem))
+				continue;
+
+			set_bit(erase_lun, e_line->erase_bitmap);
+			atomic_dec(&e_line->left_eblks);
+			*erase_ppa = rqd->ppa_list[i];
+			erase_ppa->g.blk = e_line->id;
+
+			/* Avoid evaluating e_line->left_eblks */
+			return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+							valid_secs, i + min);
+		}
+	}
+
+	/* Erase blocks that are bad in this line but might not be in next */
+	if (unlikely(ppa_empty(*erase_ppa))) {
+		struct pblk_line_meta *lm = &pblk->lm;
+
+		i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
+		if (i == lm->blk_per_line)
+			return;
+
+		set_bit(i, e_line->erase_bitmap);
+		atomic_dec(&e_line->left_eblks);
+		*erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+		erase_ppa->g.blk = e_line->id;
+	}
+}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
new file mode 100644
index 000000000000..045384ddc1f9
--- /dev/null
+++ b/drivers/lightnvm/pblk-rb.c
@@ -0,0 +1,852 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *
+ * Based upon the circular ringbuffer.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-rb.c - pblk's write buffer
+ */
+
+#include <linux/circ_buf.h>
+
+#include "pblk.h"
+
+static DECLARE_RWSEM(pblk_rb_lock);
+
+void pblk_rb_data_free(struct pblk_rb *rb)
+{
+	struct pblk_rb_pages *p, *t;
+
+	down_write(&pblk_rb_lock);
+	list_for_each_entry_safe(p, t, &rb->pages, list) {
+		free_pages((unsigned long)page_address(p->pages), p->order);
+		list_del(&p->list);
+		kfree(p);
+	}
+	up_write(&pblk_rb_lock);
+}
+
+/*
+ * Initialize ring buffer. The data and metadata buffers must be previously
+ * allocated and their size must be a power of two
+ * (Documentation/circular-buffers.txt)
+ */
+int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
+		 unsigned int power_size, unsigned int power_seg_sz)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	unsigned int init_entry = 0;
+	unsigned int alloc_order = power_size;
+	unsigned int max_order = MAX_ORDER - 1;
+	unsigned int order, iter;
+
+	down_write(&pblk_rb_lock);
+	rb->entries = rb_entry_base;
+	rb->seg_size = (1 << power_seg_sz);
+	rb->nr_entries = (1 << power_size);
+	rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
+	rb->sync_point = EMPTY_ENTRY;
+
+	spin_lock_init(&rb->w_lock);
+	spin_lock_init(&rb->s_lock);
+
+	INIT_LIST_HEAD(&rb->pages);
+
+	if (alloc_order >= max_order) {
+		order = max_order;
+		iter = (1 << (alloc_order - max_order));
+	} else {
+		order = alloc_order;
+		iter = 1;
+	}
+
+	do {
+		struct pblk_rb_entry *entry;
+		struct pblk_rb_pages *page_set;
+		void *kaddr;
+		unsigned long set_size;
+		int i;
+
+		page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
+		if (!page_set) {
+			up_write(&pblk_rb_lock);
+			return -ENOMEM;
+		}
+
+		page_set->order = order;
+		page_set->pages = alloc_pages(GFP_KERNEL, order);
+		if (!page_set->pages) {
+			kfree(page_set);
+			pblk_rb_data_free(rb);
+			up_write(&pblk_rb_lock);
+			return -ENOMEM;
+		}
+		kaddr = page_address(page_set->pages);
+
+		entry = &rb->entries[init_entry];
+		entry->data = kaddr;
+		entry->cacheline = pblk_cacheline_to_addr(init_entry++);
+		entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
+
+		set_size = (1 << order);
+		for (i = 1; i < set_size; i++) {
+			entry = &rb->entries[init_entry];
+			entry->cacheline = pblk_cacheline_to_addr(init_entry++);
+			entry->data = kaddr + (i * rb->seg_size);
+			entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
+			bio_list_init(&entry->w_ctx.bios);
+		}
+
+		list_add_tail(&page_set->list, &rb->pages);
+		iter--;
+	} while (iter > 0);
+	up_write(&pblk_rb_lock);
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_set(&rb->inflight_sync_point, 0);
+#endif
+
+	/*
+	 * Initialize rate-limiter, which controls access to the write buffer
+	 * but user and GC I/O
+	 */
+	pblk_rl_init(&pblk->rl, rb->nr_entries);
+
+	return 0;
+}
+
+/*
+ * pblk_rb_calculate_size -- calculate the size of the write buffer
+ */
+unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
+{
+	/* Alloc a write buffer that can at least fit 128 entries */
+	return (1 << max(get_count_order(nr_entries), 7));
+}
+
+void *pblk_rb_entries_ref(struct pblk_rb *rb)
+{
+	return rb->entries;
+}
+
+static void clean_wctx(struct pblk_w_ctx *w_ctx)
+{
+	int flags;
+
+try:
+	flags = READ_ONCE(w_ctx->flags);
+	if (!(flags & PBLK_SUBMITTED_ENTRY))
+		goto try;
+
+	/* Release flags on context. Protect from writes and reads */
+	smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
+	pblk_ppa_set_empty(&w_ctx->ppa);
+}
+
+#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
+#define pblk_rb_ring_space(rb, head, tail, size) \
+					(CIRC_SPACE(head, tail, size))
+
+/*
+ * Buffer space is calculated with respect to the back pointer signaling
+ * synchronized entries to the media.
+ */
+static unsigned int pblk_rb_space(struct pblk_rb *rb)
+{
+	unsigned int mem = READ_ONCE(rb->mem);
+	unsigned int sync = READ_ONCE(rb->sync);
+
+	return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
+}
+
+/*
+ * Buffer count is calculated with respect to the submission entry signaling the
+ * entries that are available to send to the media
+ */
+unsigned int pblk_rb_read_count(struct pblk_rb *rb)
+{
+	unsigned int mem = READ_ONCE(rb->mem);
+	unsigned int subm = READ_ONCE(rb->subm);
+
+	return pblk_rb_ring_count(mem, subm, rb->nr_entries);
+}
+
+unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
+{
+	unsigned int subm;
+
+	subm = READ_ONCE(rb->subm);
+	/* Commit read means updating submission pointer */
+	smp_store_release(&rb->subm,
+				(subm + nr_entries) & (rb->nr_entries - 1));
+
+	return subm;
+}
+
+static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
+				unsigned int to_update)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	struct pblk_line *line;
+	struct pblk_rb_entry *entry;
+	struct pblk_w_ctx *w_ctx;
+	unsigned int i;
+
+	for (i = 0; i < to_update; i++) {
+		entry = &rb->entries[*l2p_upd];
+		w_ctx = &entry->w_ctx;
+
+		pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
+							entry->cacheline);
+
+		line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)];
+		kref_put(&line->ref, pblk_line_put);
+		clean_wctx(w_ctx);
+		*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
+	}
+
+	return 0;
+}
+
+/*
+ * When we move the l2p_update pointer, we update the l2p table - lookups will
+ * point to the physical address instead of to the cacheline in the write buffer
+ * from this moment on.
+ */
+static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
+			      unsigned int mem, unsigned int sync)
+{
+	unsigned int space, count;
+	int ret = 0;
+
+	lockdep_assert_held(&rb->w_lock);
+
+	/* Update l2p only as buffer entries are being overwritten */
+	space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
+	if (space > nr_entries)
+		goto out;
+
+	count = nr_entries - space;
+	/* l2p_update used exclusively under rb->w_lock */
+	ret = __pblk_rb_update_l2p(rb, &rb->l2p_update, count);
+
+out:
+	return ret;
+}
+
+/*
+ * Update the l2p entry for all sectors stored on the write buffer. This means
+ * that all future lookups to the l2p table will point to a device address, not
+ * to the cacheline in the write buffer.
+ */
+void pblk_rb_sync_l2p(struct pblk_rb *rb)
+{
+	unsigned int sync;
+	unsigned int to_update;
+
+	spin_lock(&rb->w_lock);
+
+	/* Protect from reads and writes */
+	sync = smp_load_acquire(&rb->sync);
+
+	to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
+	__pblk_rb_update_l2p(rb, &rb->l2p_update, to_update);
+
+	spin_unlock(&rb->w_lock);
+}
+
+/*
+ * Write @nr_entries to ring buffer from @data buffer if there is enough space.
+ * Typically, 4KB data chunks coming from a bio will be copied to the ring
+ * buffer, thus the write will fail if not all incoming data can be copied.
+ *
+ */
+static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
+				  struct pblk_w_ctx w_ctx,
+				  struct pblk_rb_entry *entry)
+{
+	memcpy(entry->data, data, rb->seg_size);
+
+	entry->w_ctx.lba = w_ctx.lba;
+	entry->w_ctx.ppa = w_ctx.ppa;
+}
+
+void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
+			      struct pblk_w_ctx w_ctx, unsigned int ring_pos)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	struct pblk_rb_entry *entry;
+	int flags;
+
+	entry = &rb->entries[ring_pos];
+	flags = READ_ONCE(entry->w_ctx.flags);
+#ifdef CONFIG_NVM_DEBUG
+	/* Caller must guarantee that the entry is free */
+	BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
+#endif
+
+	__pblk_rb_write_entry(rb, data, w_ctx, entry);
+
+	pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
+	flags = w_ctx.flags | PBLK_WRITTEN_DATA;
+
+	/* Release flags on write context. Protect from writes */
+	smp_store_release(&entry->w_ctx.flags, flags);
+}
+
+void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
+			    struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
+			    unsigned int ring_pos)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	struct pblk_rb_entry *entry;
+	int flags;
+
+	entry = &rb->entries[ring_pos];
+	flags = READ_ONCE(entry->w_ctx.flags);
+#ifdef CONFIG_NVM_DEBUG
+	/* Caller must guarantee that the entry is free */
+	BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
+#endif
+
+	__pblk_rb_write_entry(rb, data, w_ctx, entry);
+
+	if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, gc_line))
+		entry->w_ctx.lba = ADDR_EMPTY;
+
+	flags = w_ctx.flags | PBLK_WRITTEN_DATA;
+
+	/* Release flags on write context. Protect from writes */
+	smp_store_release(&entry->w_ctx.flags, flags);
+}
+
+static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
+				  unsigned int pos)
+{
+	struct pblk_rb_entry *entry;
+	unsigned int subm, sync_point;
+	int flags;
+
+	subm = READ_ONCE(rb->subm);
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_inc(&rb->inflight_sync_point);
+#endif
+
+	if (pos == subm)
+		return 0;
+
+	sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
+	entry = &rb->entries[sync_point];
+
+	flags = READ_ONCE(entry->w_ctx.flags);
+	flags |= PBLK_FLUSH_ENTRY;
+
+	/* Release flags on context. Protect from writes */
+	smp_store_release(&entry->w_ctx.flags, flags);
+
+	/* Protect syncs */
+	smp_store_release(&rb->sync_point, sync_point);
+
+	spin_lock_irq(&rb->s_lock);
+	bio_list_add(&entry->w_ctx.bios, bio);
+	spin_unlock_irq(&rb->s_lock);
+
+	return 1;
+}
+
+static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
+			       unsigned int *pos)
+{
+	unsigned int mem;
+	unsigned int sync;
+
+	sync = READ_ONCE(rb->sync);
+	mem = READ_ONCE(rb->mem);
+
+	if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries)
+		return 0;
+
+	if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
+		return 0;
+
+	*pos = mem;
+
+	return 1;
+}
+
+static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
+			     unsigned int *pos)
+{
+	if (!__pblk_rb_may_write(rb, nr_entries, pos))
+		return 0;
+
+	/* Protect from read count */
+	smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1));
+	return 1;
+}
+
+static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
+				   unsigned int *pos, struct bio *bio,
+				   int *io_ret)
+{
+	unsigned int mem;
+
+	if (!__pblk_rb_may_write(rb, nr_entries, pos))
+		return 0;
+
+	mem = (*pos + nr_entries) & (rb->nr_entries - 1);
+	*io_ret = NVM_IO_DONE;
+
+	if (bio->bi_opf & REQ_PREFLUSH) {
+		struct pblk *pblk = container_of(rb, struct pblk, rwb);
+
+#ifdef CONFIG_NVM_DEBUG
+		atomic_long_inc(&pblk->nr_flush);
+#endif
+		if (pblk_rb_sync_point_set(&pblk->rwb, bio, mem))
+			*io_ret = NVM_IO_OK;
+	}
+
+	/* Protect from read count */
+	smp_store_release(&rb->mem, mem);
+	return 1;
+}
+
+/*
+ * Atomically check that (i) there is space on the write buffer for the
+ * incoming I/O, and (ii) the current I/O type has enough budget in the write
+ * buffer (rate-limiter).
+ */
+int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
+			   unsigned int nr_entries, unsigned int *pos)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	int flush_done;
+
+	spin_lock(&rb->w_lock);
+	if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+		spin_unlock(&rb->w_lock);
+		return NVM_IO_REQUEUE;
+	}
+
+	if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+		spin_unlock(&rb->w_lock);
+		return NVM_IO_REQUEUE;
+	}
+
+	pblk_rl_user_in(&pblk->rl, nr_entries);
+	spin_unlock(&rb->w_lock);
+
+	return flush_done;
+}
+
+/*
+ * Look at pblk_rb_may_write_user comment
+ */
+int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
+			 unsigned int *pos)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+
+	spin_lock(&rb->w_lock);
+	if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
+		spin_unlock(&rb->w_lock);
+		return 0;
+	}
+
+	if (!pblk_rb_may_write(rb, nr_entries, pos)) {
+		spin_unlock(&rb->w_lock);
+		return 0;
+	}
+
+	pblk_rl_gc_in(&pblk->rl, nr_entries);
+	spin_unlock(&rb->w_lock);
+
+	return 1;
+}
+
+/*
+ * The caller of this function must ensure that the backpointer will not
+ * overwrite the entries passed on the list.
+ */
+unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
+				      struct list_head *list,
+				      unsigned int max)
+{
+	struct pblk_rb_entry *entry, *tentry;
+	struct page *page;
+	unsigned int read = 0;
+	int ret;
+
+	list_for_each_entry_safe(entry, tentry, list, index) {
+		if (read > max) {
+			pr_err("pblk: too many entries on list\n");
+			goto out;
+		}
+
+		page = virt_to_page(entry->data);
+		if (!page) {
+			pr_err("pblk: could not allocate write bio page\n");
+			goto out;
+		}
+
+		ret = bio_add_page(bio, page, rb->seg_size, 0);
+		if (ret != rb->seg_size) {
+			pr_err("pblk: could not add page to write bio\n");
+			goto out;
+		}
+
+		list_del(&entry->index);
+		read++;
+	}
+
+out:
+	return read;
+}
+
+/*
+ * Read available entries on rb and add them to the given bio. To avoid a memory
+ * copy, a page reference to the write buffer is used to be added to the bio.
+ *
+ * This function is used by the write thread to form the write bio that will
+ * persist data on the write buffer to the media.
+ */
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
+				 struct pblk_c_ctx *c_ctx,
+				 unsigned int pos,
+				 unsigned int nr_entries,
+				 unsigned int count)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	struct pblk_rb_entry *entry;
+	struct page *page;
+	unsigned int pad = 0, read = 0, to_read = nr_entries;
+	unsigned int user_io = 0, gc_io = 0;
+	unsigned int i;
+	int flags;
+	int ret;
+
+	if (count < nr_entries) {
+		pad = nr_entries - count;
+		to_read = count;
+	}
+
+	c_ctx->sentry = pos;
+	c_ctx->nr_valid = to_read;
+	c_ctx->nr_padded = pad;
+
+	for (i = 0; i < to_read; i++) {
+		entry = &rb->entries[pos];
+
+		/* A write has been allowed into the buffer, but data is still
+		 * being copied to it. It is ok to busy wait.
+		 */
+try:
+		flags = READ_ONCE(entry->w_ctx.flags);
+		if (!(flags & PBLK_WRITTEN_DATA))
+			goto try;
+
+		if (flags & PBLK_IOTYPE_USER)
+			user_io++;
+		else if (flags & PBLK_IOTYPE_GC)
+			gc_io++;
+		else
+			WARN(1, "pblk: unknown IO type\n");
+
+		page = virt_to_page(entry->data);
+		if (!page) {
+			pr_err("pblk: could not allocate write bio page\n");
+			flags &= ~PBLK_WRITTEN_DATA;
+			flags |= PBLK_SUBMITTED_ENTRY;
+			/* Release flags on context. Protect from writes */
+			smp_store_release(&entry->w_ctx.flags, flags);
+			goto out;
+		}
+
+		ret = bio_add_page(bio, page, rb->seg_size, 0);
+		if (ret != rb->seg_size) {
+			pr_err("pblk: could not add page to write bio\n");
+			flags &= ~PBLK_WRITTEN_DATA;
+			flags |= PBLK_SUBMITTED_ENTRY;
+			/* Release flags on context. Protect from writes */
+			smp_store_release(&entry->w_ctx.flags, flags);
+			goto out;
+		}
+
+		if (flags & PBLK_FLUSH_ENTRY) {
+			unsigned int sync_point;
+
+			sync_point = READ_ONCE(rb->sync_point);
+			if (sync_point == pos) {
+				/* Protect syncs */
+				smp_store_release(&rb->sync_point, EMPTY_ENTRY);
+			}
+
+			flags &= ~PBLK_FLUSH_ENTRY;
+#ifdef CONFIG_NVM_DEBUG
+			atomic_dec(&rb->inflight_sync_point);
+#endif
+		}
+
+		flags &= ~PBLK_WRITTEN_DATA;
+		flags |= PBLK_SUBMITTED_ENTRY;
+
+		/* Release flags on context. Protect from writes */
+		smp_store_release(&entry->w_ctx.flags, flags);
+
+		pos = (pos + 1) & (rb->nr_entries - 1);
+	}
+
+	read = to_read;
+	pblk_rl_out(&pblk->rl, user_io, gc_io);
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(pad, &((struct pblk *)
+			(container_of(rb, struct pblk, rwb)))->padded_writes);
+#endif
+out:
+	return read;
+}
+
+/*
+ * Copy to bio only if the lba matches the one on the given cache entry.
+ * Otherwise, it means that the entry has been overwritten, and the bio should
+ * be directed to disk.
+ */
+int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
+			u64 pos, int bio_iter)
+{
+	struct pblk_rb_entry *entry;
+	struct pblk_w_ctx *w_ctx;
+	void *data;
+	int flags;
+	int ret = 1;
+
+	spin_lock(&rb->w_lock);
+
+#ifdef CONFIG_NVM_DEBUG
+	/* Caller must ensure that the access will not cause an overflow */
+	BUG_ON(pos >= rb->nr_entries);
+#endif
+	entry = &rb->entries[pos];
+	w_ctx = &entry->w_ctx;
+	flags = READ_ONCE(w_ctx->flags);
+
+	/* Check if the entry has been overwritten or is scheduled to be */
+	if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+		ret = 0;
+		goto out;
+	}
+
+	/* Only advance the bio if it hasn't been advanced already. If advanced,
+	 * this bio is at least a partial bio (i.e., it has partially been
+	 * filled with data from the cache). If part of the data resides on the
+	 * media, we will read later on
+	 */
+	if (unlikely(!bio->bi_iter.bi_idx))
+		bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
+
+	data = bio_data(bio);
+	memcpy(data, entry->data, rb->seg_size);
+
+out:
+	spin_unlock(&rb->w_lock);
+	return ret;
+}
+
+struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
+{
+	unsigned int entry = pos & (rb->nr_entries - 1);
+
+	return &rb->entries[entry].w_ctx;
+}
+
+unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
+	__acquires(&rb->s_lock)
+{
+	if (flags)
+		spin_lock_irqsave(&rb->s_lock, *flags);
+	else
+		spin_lock_irq(&rb->s_lock);
+
+	return rb->sync;
+}
+
+void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
+	__releases(&rb->s_lock)
+{
+	lockdep_assert_held(&rb->s_lock);
+
+	if (flags)
+		spin_unlock_irqrestore(&rb->s_lock, *flags);
+	else
+		spin_unlock_irq(&rb->s_lock);
+}
+
+unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
+{
+	unsigned int sync;
+	unsigned int i;
+
+	lockdep_assert_held(&rb->s_lock);
+
+	sync = READ_ONCE(rb->sync);
+
+	for (i = 0; i < nr_entries; i++)
+		sync = (sync + 1) & (rb->nr_entries - 1);
+
+	/* Protect from counts */
+	smp_store_release(&rb->sync, sync);
+
+	return sync;
+}
+
+unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb)
+{
+	unsigned int subm, sync_point;
+	unsigned int count;
+
+	/* Protect syncs */
+	sync_point = smp_load_acquire(&rb->sync_point);
+	if (sync_point == EMPTY_ENTRY)
+		return 0;
+
+	subm = READ_ONCE(rb->subm);
+
+	/* The sync point itself counts as a sector to sync */
+	count = pblk_rb_ring_count(sync_point, subm, rb->nr_entries) + 1;
+
+	return count;
+}
+
+/*
+ * Scan from the current position of the sync pointer to find the entry that
+ * corresponds to the given ppa. This is necessary since write requests can be
+ * completed out of order. The assumption is that the ppa is close to the sync
+ * pointer thus the search will not take long.
+ *
+ * The caller of this function must guarantee that the sync pointer will no
+ * reach the entry while it is using the metadata associated with it. With this
+ * assumption in mind, there is no need to take the sync lock.
+ */
+struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
+					      struct ppa_addr *ppa)
+{
+	unsigned int sync, subm, count;
+	unsigned int i;
+
+	sync = READ_ONCE(rb->sync);
+	subm = READ_ONCE(rb->subm);
+	count = pblk_rb_ring_count(subm, sync, rb->nr_entries);
+
+	for (i = 0; i < count; i++)
+		sync = (sync + 1) & (rb->nr_entries - 1);
+
+	return NULL;
+}
+
+int pblk_rb_tear_down_check(struct pblk_rb *rb)
+{
+	struct pblk_rb_entry *entry;
+	int i;
+	int ret = 0;
+
+	spin_lock(&rb->w_lock);
+	spin_lock_irq(&rb->s_lock);
+
+	if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
+				(rb->sync == rb->l2p_update) &&
+				(rb->sync_point == EMPTY_ENTRY)) {
+		goto out;
+	}
+
+	if (!rb->entries) {
+		ret = 1;
+		goto out;
+	}
+
+	for (i = 0; i < rb->nr_entries; i++) {
+		entry = &rb->entries[i];
+
+		if (!entry->data) {
+			ret = 1;
+			goto out;
+		}
+	}
+
+out:
+	spin_unlock(&rb->w_lock);
+	spin_unlock_irq(&rb->s_lock);
+
+	return ret;
+}
+
+unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
+{
+	return (pos & (rb->nr_entries - 1));
+}
+
+int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
+{
+	return (pos >= rb->nr_entries);
+}
+
+ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	struct pblk_c_ctx *c;
+	ssize_t offset;
+	int queued_entries = 0;
+
+	spin_lock_irq(&rb->s_lock);
+	list_for_each_entry(c, &pblk->compl_list, list)
+		queued_entries++;
+	spin_unlock_irq(&rb->s_lock);
+
+	if (rb->sync_point != EMPTY_ENTRY)
+		offset = scnprintf(buf, PAGE_SIZE,
+			"%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
+			rb->nr_entries,
+			rb->mem,
+			rb->subm,
+			rb->sync,
+			rb->l2p_update,
+#ifdef CONFIG_NVM_DEBUG
+			atomic_read(&rb->inflight_sync_point),
+#else
+			0,
+#endif
+			rb->sync_point,
+			pblk_rb_read_count(rb),
+			pblk_rb_space(rb),
+			pblk_rb_sync_point_count(rb),
+			queued_entries);
+	else
+		offset = scnprintf(buf, PAGE_SIZE,
+			"%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
+			rb->nr_entries,
+			rb->mem,
+			rb->subm,
+			rb->sync,
+			rb->l2p_update,
+#ifdef CONFIG_NVM_DEBUG
+			atomic_read(&rb->inflight_sync_point),
+#else
+			0,
+#endif
+			pblk_rb_read_count(rb),
+			pblk_rb_space(rb),
+			pblk_rb_sync_point_count(rb),
+			queued_entries);
+
+	return offset;
+}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
new file mode 100644
index 000000000000..4a12f14d78c6
--- /dev/null
+++ b/drivers/lightnvm/pblk-read.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-read.c - pblk's read path
+ */
+
+#include "pblk.h"
+
+/*
+ * There is no guarantee that the value read from cache has not been updated and
+ * resides at another location in the cache. We guarantee though that if the
+ * value is read from the cache, it belongs to the mapped lba. In order to
+ * guarantee and order between writes and reads are ordered, a flush must be
+ * issued.
+ */
+static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
+				sector_t lba, struct ppa_addr ppa,
+				int bio_iter)
+{
+#ifdef CONFIG_NVM_DEBUG
+	/* Callers must ensure that the ppa points to a cache address */
+	BUG_ON(pblk_ppa_empty(ppa));
+	BUG_ON(!pblk_addr_in_cache(ppa));
+#endif
+
+	return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
+					pblk_addr_to_cacheline(ppa), bio_iter);
+}
+
+static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
+				 unsigned long *read_bitmap)
+{
+	struct bio *bio = rqd->bio;
+	struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
+	sector_t blba = pblk_get_lba(bio);
+	int nr_secs = rqd->nr_ppas;
+	int advanced_bio = 0;
+	int i, j = 0;
+
+	/* logic error: lba out-of-bounds. Ignore read request */
+	if (blba + nr_secs >= pblk->rl.nr_secs) {
+		WARN(1, "pblk: read lbas out of bounds\n");
+		return;
+	}
+
+	pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs);
+
+	for (i = 0; i < nr_secs; i++) {
+		struct ppa_addr p = ppas[i];
+		sector_t lba = blba + i;
+
+retry:
+		if (pblk_ppa_empty(p)) {
+			WARN_ON(test_and_set_bit(i, read_bitmap));
+			continue;
+		}
+
+		/* Try to read from write buffer. The address is later checked
+		 * on the write buffer to prevent retrieving overwritten data.
+		 */
+		if (pblk_addr_in_cache(p)) {
+			if (!pblk_read_from_cache(pblk, bio, lba, p, i)) {
+				pblk_lookup_l2p_seq(pblk, &p, lba, 1);
+				goto retry;
+			}
+			WARN_ON(test_and_set_bit(i, read_bitmap));
+			advanced_bio = 1;
+		} else {
+			/* Read from media non-cached sectors */
+			rqd->ppa_list[j++] = p;
+		}
+
+		if (advanced_bio)
+			bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(nr_secs, &pblk->inflight_reads);
+#endif
+}
+
+static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	int err;
+
+	rqd->flags = pblk_set_read_mode(pblk);
+
+	err = pblk_submit_io(pblk, rqd);
+	if (err)
+		return NVM_IO_ERR;
+
+	return NVM_IO_OK;
+}
+
+static void pblk_end_io_read(struct nvm_rq *rqd)
+{
+	struct pblk *pblk = rqd->private;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+	struct bio *bio = rqd->bio;
+
+	if (rqd->error)
+		pblk_log_read_err(pblk, rqd);
+#ifdef CONFIG_NVM_DEBUG
+	else
+		WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+#endif
+
+	if (rqd->nr_ppas > 1)
+		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+
+	bio_put(bio);
+	if (r_ctx->orig_bio) {
+#ifdef CONFIG_NVM_DEBUG
+		WARN_ONCE(r_ctx->orig_bio->bi_error,
+						"pblk: corrupted read bio\n");
+#endif
+		bio_endio(r_ctx->orig_bio);
+		bio_put(r_ctx->orig_bio);
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
+	atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
+#endif
+
+	pblk_free_rqd(pblk, rqd, READ);
+}
+
+static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
+				      unsigned int bio_init_idx,
+				      unsigned long *read_bitmap)
+{
+	struct bio *new_bio, *bio = rqd->bio;
+	struct bio_vec src_bv, dst_bv;
+	void *ppa_ptr = NULL;
+	void *src_p, *dst_p;
+	dma_addr_t dma_ppa_list = 0;
+	int nr_secs = rqd->nr_ppas;
+	int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
+	int i, ret, hole;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	new_bio = bio_alloc(GFP_KERNEL, nr_holes);
+	if (!new_bio) {
+		pr_err("pblk: could not alloc read bio\n");
+		return NVM_IO_ERR;
+	}
+
+	if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
+		goto err;
+
+	if (nr_holes != new_bio->bi_vcnt) {
+		pr_err("pblk: malformed bio\n");
+		goto err;
+	}
+
+	new_bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
+	new_bio->bi_private = &wait;
+	new_bio->bi_end_io = pblk_end_bio_sync;
+
+	rqd->bio = new_bio;
+	rqd->nr_ppas = nr_holes;
+	rqd->end_io = NULL;
+
+	if (unlikely(nr_secs > 1 && nr_holes == 1)) {
+		ppa_ptr = rqd->ppa_list;
+		dma_ppa_list = rqd->dma_ppa_list;
+		rqd->ppa_addr = rqd->ppa_list[0];
+	}
+
+	ret = pblk_submit_read_io(pblk, rqd);
+	if (ret) {
+		bio_put(rqd->bio);
+		pr_err("pblk: read IO submission failed\n");
+		goto err;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: partial read I/O timed out\n");
+	}
+
+	if (rqd->error) {
+		atomic_long_inc(&pblk->read_failed);
+#ifdef CONFIG_NVM_DEBUG
+		pblk_print_failed_rqd(pblk, rqd, rqd->error);
+#endif
+	}
+
+	if (unlikely(nr_secs > 1 && nr_holes == 1)) {
+		rqd->ppa_list = ppa_ptr;
+		rqd->dma_ppa_list = dma_ppa_list;
+	}
+
+	/* Fill the holes in the original bio */
+	i = 0;
+	hole = find_first_zero_bit(read_bitmap, nr_secs);
+	do {
+		src_bv = new_bio->bi_io_vec[i++];
+		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
+
+		src_p = kmap_atomic(src_bv.bv_page);
+		dst_p = kmap_atomic(dst_bv.bv_page);
+
+		memcpy(dst_p + dst_bv.bv_offset,
+			src_p + src_bv.bv_offset,
+			PBLK_EXPOSED_PAGE_SIZE);
+
+		kunmap_atomic(src_p);
+		kunmap_atomic(dst_p);
+
+		mempool_free(src_bv.bv_page, pblk->page_pool);
+
+		hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
+	} while (hole < nr_secs);
+
+	bio_put(new_bio);
+
+	/* Complete the original bio and associated request */
+	rqd->bio = bio;
+	rqd->nr_ppas = nr_secs;
+	rqd->private = pblk;
+
+	bio_endio(bio);
+	pblk_end_io_read(rqd);
+	return NVM_IO_OK;
+
+err:
+	/* Free allocated pages in new bio */
+	pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
+	rqd->private = pblk;
+	pblk_end_io_read(rqd);
+	return NVM_IO_ERR;
+}
+
+static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
+			 unsigned long *read_bitmap)
+{
+	struct bio *bio = rqd->bio;
+	struct ppa_addr ppa;
+	sector_t lba = pblk_get_lba(bio);
+
+	/* logic error: lba out-of-bounds. Ignore read request */
+	if (lba >= pblk->rl.nr_secs) {
+		WARN(1, "pblk: read lba out of bounds\n");
+		return;
+	}
+
+	pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_inc(&pblk->inflight_reads);
+#endif
+
+retry:
+	if (pblk_ppa_empty(ppa)) {
+		WARN_ON(test_and_set_bit(0, read_bitmap));
+		return;
+	}
+
+	/* Try to read from write buffer. The address is later checked on the
+	 * write buffer to prevent retrieving overwritten data.
+	 */
+	if (pblk_addr_in_cache(ppa)) {
+		if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0)) {
+			pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+			goto retry;
+		}
+		WARN_ON(test_and_set_bit(0, read_bitmap));
+	} else {
+		rqd->ppa_addr = ppa;
+	}
+}
+
+int pblk_submit_read(struct pblk *pblk, struct bio *bio)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	unsigned int nr_secs = pblk_get_secs(bio);
+	struct nvm_rq *rqd;
+	unsigned long read_bitmap; /* Max 64 ppas per request */
+	unsigned int bio_init_idx;
+	int ret = NVM_IO_ERR;
+
+	if (nr_secs > PBLK_MAX_REQ_ADDRS)
+		return NVM_IO_ERR;
+
+	bitmap_zero(&read_bitmap, nr_secs);
+
+	rqd = pblk_alloc_rqd(pblk, READ);
+	if (IS_ERR(rqd)) {
+		pr_err_ratelimited("pblk: not able to alloc rqd");
+		return NVM_IO_ERR;
+	}
+
+	rqd->opcode = NVM_OP_PREAD;
+	rqd->bio = bio;
+	rqd->nr_ppas = nr_secs;
+	rqd->private = pblk;
+	rqd->end_io = pblk_end_io_read;
+
+	/* Save the index for this bio's start. This is needed in case
+	 * we need to fill a partial read.
+	 */
+	bio_init_idx = pblk_get_bi_idx(bio);
+
+	if (nr_secs > 1) {
+		rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+						&rqd->dma_ppa_list);
+		if (!rqd->ppa_list) {
+			pr_err("pblk: not able to allocate ppa list\n");
+			goto fail_rqd_free;
+		}
+
+		pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
+	} else {
+		pblk_read_rq(pblk, rqd, &read_bitmap);
+	}
+
+	bio_get(bio);
+	if (bitmap_full(&read_bitmap, nr_secs)) {
+		bio_endio(bio);
+		pblk_end_io_read(rqd);
+		return NVM_IO_OK;
+	}
+
+	/* All sectors are to be read from the device */
+	if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
+		struct bio *int_bio = NULL;
+		struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+
+		/* Clone read bio to deal with read errors internally */
+		int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+		if (!int_bio) {
+			pr_err("pblk: could not clone read bio\n");
+			return NVM_IO_ERR;
+		}
+
+		rqd->bio = int_bio;
+		r_ctx->orig_bio = bio;
+
+		ret = pblk_submit_read_io(pblk, rqd);
+		if (ret) {
+			pr_err("pblk: read IO submission failed\n");
+			if (int_bio)
+				bio_put(int_bio);
+			return ret;
+		}
+
+		return NVM_IO_OK;
+	}
+
+	/* The read bio request could be partially filled by the write buffer,
+	 * but there are some holes that need to be read from the drive.
+	 */
+	ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap);
+	if (ret) {
+		pr_err("pblk: failed to perform partial read\n");
+		return ret;
+	}
+
+	return NVM_IO_OK;
+
+fail_rqd_free:
+	pblk_free_rqd(pblk, rqd, READ);
+	return ret;
+}
+
+static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
+			      struct pblk_line *line, u64 *lba_list,
+			      unsigned int nr_secs)
+{
+	struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
+	int valid_secs = 0;
+	int i;
+
+	pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs);
+
+	for (i = 0; i < nr_secs; i++) {
+		if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id ||
+						pblk_ppa_empty(ppas[i])) {
+			lba_list[i] = ADDR_EMPTY;
+			continue;
+		}
+
+		rqd->ppa_list[valid_secs++] = ppas[i];
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(valid_secs, &pblk->inflight_reads);
+#endif
+	return valid_secs;
+}
+
+static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
+		      struct pblk_line *line, sector_t lba)
+{
+	struct ppa_addr ppa;
+	int valid_secs = 0;
+
+	if (lba == ADDR_EMPTY)
+		goto out;
+
+	/* logic error: lba out-of-bounds */
+	if (lba >= pblk->rl.nr_secs) {
+		WARN(1, "pblk: read lba out of bounds\n");
+		goto out;
+	}
+
+	spin_lock(&pblk->trans_lock);
+	ppa = pblk_trans_map_get(pblk, lba);
+	spin_unlock(&pblk->trans_lock);
+
+	/* Ignore updated values until the moment */
+	if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id ||
+							pblk_ppa_empty(ppa))
+		goto out;
+
+	rqd->ppa_addr = ppa;
+	valid_secs = 1;
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_inc(&pblk->inflight_reads);
+#endif
+
+out:
+	return valid_secs;
+}
+
+int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
+			unsigned int nr_secs, unsigned int *secs_to_gc,
+			struct pblk_line *line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct request_queue *q = dev->q;
+	struct bio *bio;
+	struct nvm_rq rqd;
+	int ret, data_len;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	memset(&rqd, 0, sizeof(struct nvm_rq));
+
+	if (nr_secs > 1) {
+		rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd.dma_ppa_list);
+		if (!rqd.ppa_list)
+			return NVM_IO_ERR;
+
+		*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
+								nr_secs);
+		if (*secs_to_gc == 1) {
+			struct ppa_addr ppa;
+
+			ppa = rqd.ppa_list[0];
+			nvm_dev_dma_free(dev->parent, rqd.ppa_list,
+							rqd.dma_ppa_list);
+			rqd.ppa_addr = ppa;
+		}
+	} else {
+		*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
+	}
+
+	if (!(*secs_to_gc))
+		goto out;
+
+	data_len = (*secs_to_gc) * geo->sec_size;
+	bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+	if (IS_ERR(bio)) {
+		pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
+		goto err_free_dma;
+	}
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+	rqd.opcode = NVM_OP_PREAD;
+	rqd.end_io = pblk_end_io_sync;
+	rqd.private = &wait;
+	rqd.nr_ppas = *secs_to_gc;
+	rqd.bio = bio;
+
+	ret = pblk_submit_read_io(pblk, &rqd);
+	if (ret) {
+		bio_endio(bio);
+		pr_err("pblk: GC read request failed\n");
+		goto err_free_dma;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: GC read I/O timed out\n");
+	}
+
+	if (rqd.error) {
+		atomic_long_inc(&pblk->read_failed_gc);
+#ifdef CONFIG_NVM_DEBUG
+		pblk_print_failed_rqd(pblk, &rqd, rqd.error);
+#endif
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(*secs_to_gc, &pblk->sync_reads);
+	atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads);
+	atomic_long_sub(*secs_to_gc, &pblk->inflight_reads);
+#endif
+
+out:
+	if (rqd.nr_ppas > 1)
+		nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	return NVM_IO_OK;
+
+err_free_dma:
+	if (rqd.nr_ppas > 1)
+		nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	return NVM_IO_ERR;
+}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
new file mode 100644
index 000000000000..f8f85087cd3c
--- /dev/null
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -0,0 +1,998 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial: Javier Gonzalez <javier@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-recovery.c - pblk's recovery path
+ */
+
+#include "pblk.h"
+
+void pblk_submit_rec(struct work_struct *work)
+{
+	struct pblk_rec_ctx *recovery =
+			container_of(work, struct pblk_rec_ctx, ws_rec);
+	struct pblk *pblk = recovery->pblk;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_rq *rqd = recovery->rqd;
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+	int max_secs = nvm_max_phys_sects(dev);
+	struct bio *bio;
+	unsigned int nr_rec_secs;
+	unsigned int pgs_read;
+	int ret;
+
+	nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
+								max_secs);
+
+	bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
+	if (!bio) {
+		pr_err("pblk: not able to create recovery bio\n");
+		return;
+	}
+
+	bio->bi_iter.bi_sector = 0;
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+	rqd->bio = bio;
+	rqd->nr_ppas = nr_rec_secs;
+
+	pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
+								nr_rec_secs);
+	if (pgs_read != nr_rec_secs) {
+		pr_err("pblk: could not read recovery entries\n");
+		goto err;
+	}
+
+	if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
+		pr_err("pblk: could not setup recovery request\n");
+		goto err;
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(nr_rec_secs, &pblk->recov_writes);
+#endif
+
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: I/O submission failed: %d\n", ret);
+		goto err;
+	}
+
+	mempool_free(recovery, pblk->rec_pool);
+	return;
+
+err:
+	bio_put(bio);
+	pblk_free_rqd(pblk, rqd, WRITE);
+}
+
+int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
+			struct pblk_rec_ctx *recovery, u64 *comp_bits,
+			unsigned int comp)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	int max_secs = nvm_max_phys_sects(dev);
+	struct nvm_rq *rec_rqd;
+	struct pblk_c_ctx *rec_ctx;
+	int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
+
+	rec_rqd = pblk_alloc_rqd(pblk, WRITE);
+	if (IS_ERR(rec_rqd)) {
+		pr_err("pblk: could not create recovery req.\n");
+		return -ENOMEM;
+	}
+
+	rec_ctx = nvm_rq_to_pdu(rec_rqd);
+
+	/* Copy completion bitmap, but exclude the first X completed entries */
+	bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
+				(unsigned long int *)comp_bits,
+				comp, max_secs);
+
+	/* Save the context for the entries that need to be re-written and
+	 * update current context with the completed entries.
+	 */
+	rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
+	if (comp >= c_ctx->nr_valid) {
+		rec_ctx->nr_valid = 0;
+		rec_ctx->nr_padded = nr_entries - comp;
+
+		c_ctx->nr_padded = comp - c_ctx->nr_valid;
+	} else {
+		rec_ctx->nr_valid = c_ctx->nr_valid - comp;
+		rec_ctx->nr_padded = c_ctx->nr_padded;
+
+		c_ctx->nr_valid = comp;
+		c_ctx->nr_padded = 0;
+	}
+
+	recovery->rqd = rec_rqd;
+	recovery->pblk = pblk;
+
+	return 0;
+}
+
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+{
+	u32 crc;
+
+	crc = pblk_calc_emeta_crc(pblk, emeta);
+	if (le32_to_cpu(emeta->crc) != crc)
+		return NULL;
+
+	if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+		return NULL;
+
+	return pblk_line_emeta_to_lbas(emeta);
+}
+
+static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct line_emeta *emeta = line->emeta;
+	__le64 *lba_list;
+	int data_start;
+	int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
+	int i;
+
+	lba_list = pblk_recov_get_lba_list(pblk, emeta);
+	if (!lba_list)
+		return 1;
+
+	data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
+	nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
+	nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+
+	for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
+		struct ppa_addr ppa;
+		int pos;
+
+		ppa = addr_to_pblk_ppa(pblk, i, line->id);
+		pos = pblk_ppa_to_pos(geo, ppa);
+
+		/* Do not update bad blocks */
+		if (test_bit(pos, line->blk_bitmap))
+			continue;
+
+		if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
+			spin_lock(&line->lock);
+			if (test_and_set_bit(i, line->invalid_bitmap))
+				WARN_ONCE(1, "pblk: rec. double invalidate:\n");
+			else
+				line->vsc--;
+			spin_unlock(&line->lock);
+
+			continue;
+		}
+
+		pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
+		nr_lbas++;
+	}
+
+	if (nr_valid_lbas != nr_lbas)
+		pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
+				line->id, line->emeta->nr_valid_lbas, nr_lbas);
+
+	line->left_msecs = 0;
+
+	return 0;
+}
+
+static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
+
+	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+				nr_bb * geo->sec_per_blk;
+}
+
+struct pblk_recov_alloc {
+	struct ppa_addr *ppa_list;
+	struct pblk_sec_meta *meta_list;
+	struct nvm_rq *rqd;
+	void *data;
+	dma_addr_t dma_ppa_list;
+	dma_addr_t dma_meta_list;
+};
+
+static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
+			       struct pblk_recov_alloc p, u64 r_ptr)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct ppa_addr *ppa_list;
+	struct pblk_sec_meta *meta_list;
+	struct nvm_rq *rqd;
+	struct bio *bio;
+	void *data;
+	dma_addr_t dma_ppa_list, dma_meta_list;
+	u64 r_ptr_int;
+	int left_ppas;
+	int rq_ppas, rq_len;
+	int i, j;
+	int ret = 0;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	ppa_list = p.ppa_list;
+	meta_list = p.meta_list;
+	rqd = p.rqd;
+	data = p.data;
+	dma_ppa_list = p.dma_ppa_list;
+	dma_meta_list = p.dma_meta_list;
+
+	left_ppas = line->cur_sec - r_ptr;
+	if (!left_ppas)
+		return 0;
+
+	r_ptr_int = r_ptr;
+
+next_read_rq:
+	memset(rqd, 0, pblk_r_rq_size);
+
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	if (!rq_ppas)
+		rq_ppas = pblk->min_write_pgs;
+	rq_len = rq_ppas * geo->sec_size;
+
+	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+	rqd->bio = bio;
+	rqd->opcode = NVM_OP_PREAD;
+	rqd->flags = pblk_set_read_mode(pblk);
+	rqd->meta_list = meta_list;
+	rqd->nr_ppas = rq_ppas;
+	rqd->ppa_list = ppa_list;
+	rqd->dma_ppa_list = dma_ppa_list;
+	rqd->dma_meta_list = dma_meta_list;
+	rqd->end_io = pblk_end_io_sync;
+	rqd->private = &wait;
+
+	for (i = 0; i < rqd->nr_ppas; ) {
+		struct ppa_addr ppa;
+		int pos;
+
+		ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
+		pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+		while (test_bit(pos, line->blk_bitmap)) {
+			r_ptr_int += pblk->min_write_pgs;
+			ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
+			pos = pblk_dev_ppa_to_pos(geo, ppa);
+		}
+
+		for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
+			rqd->ppa_list[i] =
+				addr_to_gen_ppa(pblk, r_ptr_int, line->id);
+	}
+
+	/* If read fails, more padding is needed */
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: I/O submission failed: %d\n", ret);
+		return ret;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: L2P recovery read timed out\n");
+		return -EINTR;
+	}
+
+	reinit_completion(&wait);
+
+	/* At this point, the read should not fail. If it does, it is a problem
+	 * we cannot recover from here. Need FTL log.
+	 */
+	if (rqd->error) {
+		pr_err("pblk: L2P recovery failed (%d)\n", rqd->error);
+		return -EINTR;
+	}
+
+	for (i = 0; i < rqd->nr_ppas; i++) {
+		u64 lba = le64_to_cpu(meta_list[i].lba);
+
+		if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
+			continue;
+
+		pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+	}
+
+	left_ppas -= rq_ppas;
+	if (left_ppas > 0)
+		goto next_read_rq;
+
+	return 0;
+}
+
+static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
+			      struct pblk_recov_alloc p, int left_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct ppa_addr *ppa_list;
+	struct pblk_sec_meta *meta_list;
+	struct nvm_rq *rqd;
+	struct bio *bio;
+	void *data;
+	dma_addr_t dma_ppa_list, dma_meta_list;
+	__le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+	u64 w_ptr = line->cur_sec;
+	int left_line_ppas = line->left_msecs;
+	int rq_ppas, rq_len;
+	int i, j;
+	int ret = 0;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	ppa_list = p.ppa_list;
+	meta_list = p.meta_list;
+	rqd = p.rqd;
+	data = p.data;
+	dma_ppa_list = p.dma_ppa_list;
+	dma_meta_list = p.dma_meta_list;
+
+next_pad_rq:
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	if (!rq_ppas)
+		rq_ppas = pblk->min_write_pgs;
+	rq_len = rq_ppas * geo->sec_size;
+
+	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+	memset(rqd, 0, pblk_r_rq_size);
+
+	rqd->bio = bio;
+	rqd->opcode = NVM_OP_PWRITE;
+	rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+	rqd->meta_list = meta_list;
+	rqd->nr_ppas = rq_ppas;
+	rqd->ppa_list = ppa_list;
+	rqd->dma_ppa_list = dma_ppa_list;
+	rqd->dma_meta_list = dma_meta_list;
+	rqd->end_io = pblk_end_io_sync;
+	rqd->private = &wait;
+
+	for (i = 0; i < rqd->nr_ppas; ) {
+		struct ppa_addr ppa;
+		int pos;
+
+		w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+		ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
+		pos = pblk_ppa_to_pos(geo, ppa);
+
+		while (test_bit(pos, line->blk_bitmap)) {
+			w_ptr += pblk->min_write_pgs;
+			ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
+			pos = pblk_ppa_to_pos(geo, ppa);
+		}
+
+		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
+			struct ppa_addr dev_ppa;
+
+			dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
+
+			pblk_map_invalidate(pblk, dev_ppa);
+			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
+			lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+			rqd->ppa_list[i] = dev_ppa;
+		}
+	}
+
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: I/O submission failed: %d\n", ret);
+		return ret;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: L2P recovery write timed out\n");
+	}
+	reinit_completion(&wait);
+
+	left_line_ppas -= rq_ppas;
+	left_ppas -= rq_ppas;
+	if (left_ppas > 0 && left_line_ppas)
+		goto next_pad_rq;
+
+	return 0;
+}
+
+/* When this function is called, it means that not all upper pages have been
+ * written in a page that contains valid data. In order to recover this data, we
+ * first find the write pointer on the device, then we pad all necessary
+ * sectors, and finally attempt to read the valid data
+ */
+static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
+				   struct pblk_recov_alloc p)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct ppa_addr *ppa_list;
+	struct pblk_sec_meta *meta_list;
+	struct nvm_rq *rqd;
+	struct bio *bio;
+	void *data;
+	dma_addr_t dma_ppa_list, dma_meta_list;
+	u64 w_ptr = 0, r_ptr;
+	int rq_ppas, rq_len;
+	int i, j;
+	int ret = 0;
+	int rec_round;
+	int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	ppa_list = p.ppa_list;
+	meta_list = p.meta_list;
+	rqd = p.rqd;
+	data = p.data;
+	dma_ppa_list = p.dma_ppa_list;
+	dma_meta_list = p.dma_meta_list;
+
+	/* we could recover up until the line write pointer */
+	r_ptr = line->cur_sec;
+	rec_round = 0;
+
+next_rq:
+	memset(rqd, 0, pblk_r_rq_size);
+
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	if (!rq_ppas)
+		rq_ppas = pblk->min_write_pgs;
+	rq_len = rq_ppas * geo->sec_size;
+
+	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+	rqd->bio = bio;
+	rqd->opcode = NVM_OP_PREAD;
+	rqd->flags = pblk_set_read_mode(pblk);
+	rqd->meta_list = meta_list;
+	rqd->nr_ppas = rq_ppas;
+	rqd->ppa_list = ppa_list;
+	rqd->dma_ppa_list = dma_ppa_list;
+	rqd->dma_meta_list = dma_meta_list;
+	rqd->end_io = pblk_end_io_sync;
+	rqd->private = &wait;
+
+	for (i = 0; i < rqd->nr_ppas; ) {
+		struct ppa_addr ppa;
+		int pos;
+
+		w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+		ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
+		pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+		while (test_bit(pos, line->blk_bitmap)) {
+			w_ptr += pblk->min_write_pgs;
+			ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
+			pos = pblk_dev_ppa_to_pos(geo, ppa);
+		}
+
+		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
+			rqd->ppa_list[i] =
+				addr_to_gen_ppa(pblk, w_ptr, line->id);
+	}
+
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: I/O submission failed: %d\n", ret);
+		return ret;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: L2P recovery read timed out\n");
+	}
+	reinit_completion(&wait);
+
+	/* This should not happen since the read failed during normal recovery,
+	 * but the media works funny sometimes...
+	 */
+	if (!rec_round++ && !rqd->error) {
+		rec_round = 0;
+		for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
+			u64 lba = le64_to_cpu(meta_list[i].lba);
+
+			if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
+				continue;
+
+			pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+		}
+	}
+
+	/* Reached the end of the written line */
+	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
+		int pad_secs, nr_error_bits, bit;
+		int ret;
+
+		bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
+		nr_error_bits = rqd->nr_ppas - bit;
+
+		/* Roll back failed sectors */
+		line->cur_sec -= nr_error_bits;
+		line->left_msecs += nr_error_bits;
+		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
+
+		pad_secs = pblk_pad_distance(pblk);
+		if (pad_secs > line->left_msecs)
+			pad_secs = line->left_msecs;
+
+		ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+		if (ret)
+			pr_err("pblk: OOB padding failed (err:%d)\n", ret);
+
+		ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
+		if (ret)
+			pr_err("pblk: OOB read failed (err:%d)\n", ret);
+
+		line->left_ssecs = line->left_msecs;
+		left_ppas = 0;
+	}
+
+	left_ppas -= rq_ppas;
+	if (left_ppas > 0)
+		goto next_rq;
+
+	return ret;
+}
+
+static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
+			       struct pblk_recov_alloc p, int *done)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct ppa_addr *ppa_list;
+	struct pblk_sec_meta *meta_list;
+	struct nvm_rq *rqd;
+	struct bio *bio;
+	void *data;
+	dma_addr_t dma_ppa_list, dma_meta_list;
+	u64 paddr;
+	int rq_ppas, rq_len;
+	int i, j;
+	int ret = 0;
+	int left_ppas = pblk_calc_sec_in_line(pblk, line);
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	ppa_list = p.ppa_list;
+	meta_list = p.meta_list;
+	rqd = p.rqd;
+	data = p.data;
+	dma_ppa_list = p.dma_ppa_list;
+	dma_meta_list = p.dma_meta_list;
+
+	*done = 1;
+
+next_rq:
+	memset(rqd, 0, pblk_r_rq_size);
+
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	if (!rq_ppas)
+		rq_ppas = pblk->min_write_pgs;
+	rq_len = rq_ppas * geo->sec_size;
+
+	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+	rqd->bio = bio;
+	rqd->opcode = NVM_OP_PREAD;
+	rqd->flags = pblk_set_read_mode(pblk);
+	rqd->meta_list = meta_list;
+	rqd->nr_ppas = rq_ppas;
+	rqd->ppa_list = ppa_list;
+	rqd->dma_ppa_list = dma_ppa_list;
+	rqd->dma_meta_list = dma_meta_list;
+	rqd->end_io = pblk_end_io_sync;
+	rqd->private = &wait;
+
+	for (i = 0; i < rqd->nr_ppas; ) {
+		struct ppa_addr ppa;
+		int pos;
+
+		paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+		ppa = addr_to_gen_ppa(pblk, paddr, line->id);
+		pos = pblk_dev_ppa_to_pos(geo, ppa);
+
+		while (test_bit(pos, line->blk_bitmap)) {
+			paddr += pblk->min_write_pgs;
+			ppa = addr_to_gen_ppa(pblk, paddr, line->id);
+			pos = pblk_dev_ppa_to_pos(geo, ppa);
+		}
+
+		for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
+			rqd->ppa_list[i] =
+				addr_to_gen_ppa(pblk, paddr, line->id);
+	}
+
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: I/O submission failed: %d\n", ret);
+		bio_put(bio);
+		return ret;
+	}
+
+	if (!wait_for_completion_io_timeout(&wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: L2P recovery read timed out\n");
+	}
+	reinit_completion(&wait);
+
+	/* Reached the end of the written line */
+	if (rqd->error) {
+		int nr_error_bits, bit;
+
+		bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
+		nr_error_bits = rqd->nr_ppas - bit;
+
+		/* Roll back failed sectors */
+		line->cur_sec -= nr_error_bits;
+		line->left_msecs += nr_error_bits;
+		line->left_ssecs = line->left_msecs;
+		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
+
+		left_ppas = 0;
+		rqd->nr_ppas = bit;
+
+		if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
+			*done = 0;
+	}
+
+	for (i = 0; i < rqd->nr_ppas; i++) {
+		u64 lba = le64_to_cpu(meta_list[i].lba);
+
+		if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
+			continue;
+
+		pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+	}
+
+	left_ppas -= rq_ppas;
+	if (left_ppas > 0)
+		goto next_rq;
+
+	return ret;
+}
+
+/* Scan line for lbas on out of bound area */
+static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct nvm_rq *rqd;
+	struct ppa_addr *ppa_list;
+	struct pblk_sec_meta *meta_list;
+	struct pblk_recov_alloc p;
+	void *data;
+	dma_addr_t dma_ppa_list, dma_meta_list;
+	int done, ret = 0;
+
+	rqd = pblk_alloc_rqd(pblk, READ);
+	if (IS_ERR(rqd))
+		return PTR_ERR(rqd);
+
+	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+	if (!meta_list) {
+		ret = -ENOMEM;
+		goto free_rqd;
+	}
+
+	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+	data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
+	if (!data) {
+		ret = -ENOMEM;
+		goto free_meta_list;
+	}
+
+	p.ppa_list = ppa_list;
+	p.meta_list = meta_list;
+	p.rqd = rqd;
+	p.data = data;
+	p.dma_ppa_list = dma_ppa_list;
+	p.dma_meta_list = dma_meta_list;
+
+	ret = pblk_recov_scan_oob(pblk, line, p, &done);
+	if (ret) {
+		pr_err("pblk: could not recover L2P from OOB\n");
+		goto out;
+	}
+
+	if (!done) {
+		ret = pblk_recov_scan_all_oob(pblk, line, p);
+		if (ret) {
+			pr_err("pblk: could not recover L2P from OOB\n");
+			goto out;
+		}
+	}
+
+	if (pblk_line_is_full(line))
+		pblk_line_recov_close(pblk, line);
+
+out:
+	kfree(data);
+free_meta_list:
+	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+free_rqd:
+	pblk_free_rqd(pblk, rqd, READ);
+
+	return ret;
+}
+
+/* Insert lines ordered by sequence number (seq_num) on list */
+static void pblk_recov_line_add_ordered(struct list_head *head,
+					struct pblk_line *line)
+{
+	struct pblk_line *t = NULL;
+
+	list_for_each_entry(t, head, list)
+		if (t->seq_nr > line->seq_nr)
+			break;
+
+	__list_add(&line->list, t->list.prev, &t->list);
+}
+
+struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *line, *tline, *data_line = NULL;
+	struct line_smeta *smeta;
+	struct line_emeta *emeta;
+	int found_lines = 0, recovered_lines = 0, open_lines = 0;
+	int is_next = 0;
+	int meta_line;
+	int i, valid_uuid = 0;
+	LIST_HEAD(recov_list);
+
+	/* TODO: Implement FTL snapshot */
+
+	/* Scan recovery - takes place when FTL snapshot fails */
+	spin_lock(&l_mg->free_lock);
+	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+	set_bit(meta_line, &l_mg->meta_bitmap);
+	smeta = l_mg->sline_meta[meta_line].meta;
+	emeta = l_mg->eline_meta[meta_line].meta;
+	spin_unlock(&l_mg->free_lock);
+
+	/* Order data lines using their sequence number */
+	for (i = 0; i < l_mg->nr_lines; i++) {
+		u32 crc;
+
+		line = &pblk->lines[i];
+
+		memset(smeta, 0, lm->smeta_len);
+		line->smeta = smeta;
+		line->lun_bitmap = ((void *)(smeta)) +
+						sizeof(struct line_smeta);
+
+		/* Lines that cannot be read are assumed as not written here */
+		if (pblk_line_read_smeta(pblk, line))
+			continue;
+
+		crc = pblk_calc_smeta_crc(pblk, smeta);
+		if (le32_to_cpu(smeta->crc) != crc)
+			continue;
+
+		if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+			continue;
+
+		if (le16_to_cpu(smeta->header.version) != 1) {
+			pr_err("pblk: found incompatible line version %u\n",
+					smeta->header.version);
+			return ERR_PTR(-EINVAL);
+		}
+
+		/* The first valid instance uuid is used for initialization */
+		if (!valid_uuid) {
+			memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+			valid_uuid = 1;
+		}
+
+		if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+			pr_debug("pblk: ignore line %u due to uuid mismatch\n",
+					i);
+			continue;
+		}
+
+		/* Update line metadata */
+		spin_lock(&line->lock);
+		line->id = le32_to_cpu(line->smeta->header.id);
+		line->type = le16_to_cpu(line->smeta->header.type);
+		line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+		spin_unlock(&line->lock);
+
+		/* Update general metadata */
+		spin_lock(&l_mg->free_lock);
+		if (line->seq_nr >= l_mg->d_seq_nr)
+			l_mg->d_seq_nr = line->seq_nr + 1;
+		l_mg->nr_free_lines--;
+		spin_unlock(&l_mg->free_lock);
+
+		if (pblk_line_recov_alloc(pblk, line))
+			goto out;
+
+		pblk_recov_line_add_ordered(&recov_list, line);
+		found_lines++;
+		pr_debug("pblk: recovering data line %d, seq:%llu\n",
+						line->id, smeta->seq_nr);
+	}
+
+	if (!found_lines) {
+		pblk_setup_uuid(pblk);
+
+		spin_lock(&l_mg->free_lock);
+		WARN_ON_ONCE(!test_and_clear_bit(meta_line,
+							&l_mg->meta_bitmap));
+		spin_unlock(&l_mg->free_lock);
+
+		goto out;
+	}
+
+	/* Verify closed blocks and recover this portion of L2P table*/
+	list_for_each_entry_safe(line, tline, &recov_list, list) {
+		int off, nr_bb;
+
+		recovered_lines++;
+		/* Calculate where emeta starts based on the line bb */
+		off = lm->sec_per_line - lm->emeta_sec;
+		nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
+		off -= nr_bb * geo->sec_per_pl;
+
+		memset(emeta, 0, lm->emeta_len);
+		line->emeta = emeta;
+		line->emeta_ssec = off;
+
+		if (pblk_line_read_emeta(pblk, line)) {
+			pblk_recov_l2p_from_oob(pblk, line);
+			goto next;
+		}
+
+		if (pblk_recov_l2p_from_emeta(pblk, line))
+			pblk_recov_l2p_from_oob(pblk, line);
+
+next:
+		if (pblk_line_is_full(line)) {
+			struct list_head *move_list;
+
+			spin_lock(&line->lock);
+			line->state = PBLK_LINESTATE_CLOSED;
+			move_list = pblk_line_gc_list(pblk, line);
+			spin_unlock(&line->lock);
+
+			spin_lock(&l_mg->gc_lock);
+			list_move_tail(&line->list, move_list);
+			spin_unlock(&l_mg->gc_lock);
+
+			mempool_free(line->map_bitmap, pblk->line_meta_pool);
+			line->map_bitmap = NULL;
+			line->smeta = NULL;
+			line->emeta = NULL;
+		} else {
+			if (open_lines > 1)
+				pr_err("pblk: failed to recover L2P\n");
+
+			open_lines++;
+			line->meta_line = meta_line;
+			data_line = line;
+		}
+	}
+
+	spin_lock(&l_mg->free_lock);
+	if (!open_lines) {
+		WARN_ON_ONCE(!test_and_clear_bit(meta_line,
+							&l_mg->meta_bitmap));
+		pblk_line_replace_data(pblk);
+	} else {
+		/* Allocate next line for preparation */
+		l_mg->data_next = pblk_line_get(pblk);
+		if (l_mg->data_next) {
+			l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+			l_mg->data_next->type = PBLK_LINETYPE_DATA;
+			is_next = 1;
+		}
+	}
+	spin_unlock(&l_mg->free_lock);
+
+	if (is_next) {
+		pblk_line_erase(pblk, l_mg->data_next);
+		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+	}
+
+out:
+	if (found_lines != recovered_lines)
+		pr_err("pblk: failed to recover all found lines %d/%d\n",
+						found_lines, recovered_lines);
+
+	return data_line;
+}
+
+/*
+ * Pad until smeta can be read on current data line
+ */
+void pblk_recov_pad(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line *line;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct nvm_rq *rqd;
+	struct pblk_recov_alloc p;
+	struct ppa_addr *ppa_list;
+	struct pblk_sec_meta *meta_list;
+	void *data;
+	dma_addr_t dma_ppa_list, dma_meta_list;
+
+	spin_lock(&l_mg->free_lock);
+	line = l_mg->data_line;
+	spin_unlock(&l_mg->free_lock);
+
+	rqd = pblk_alloc_rqd(pblk, READ);
+	if (IS_ERR(rqd))
+		return;
+
+	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+	if (!meta_list)
+		goto free_rqd;
+
+	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+	data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
+	if (!data)
+		goto free_meta_list;
+
+	p.ppa_list = ppa_list;
+	p.meta_list = meta_list;
+	p.rqd = rqd;
+	p.data = data;
+	p.dma_ppa_list = dma_ppa_list;
+	p.dma_meta_list = dma_meta_list;
+
+	if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
+		pr_err("pblk: Tear down padding failed\n");
+		goto free_data;
+	}
+
+	pblk_line_close(pblk, line);
+
+free_data:
+	kfree(data);
+free_meta_list:
+	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+free_rqd:
+	pblk_free_rqd(pblk, rqd, READ);
+}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
new file mode 100644
index 000000000000..ab7cbb144f3f
--- /dev/null
+++ b/drivers/lightnvm/pblk-rl.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-rl.c - pblk's rate limiter for user I/O
+ *
+ */
+
+#include "pblk.h"
+
+static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
+{
+	mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
+}
+
+int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
+{
+	int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+
+	return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+}
+
+int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
+{
+	int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
+	int rb_user_active;
+
+	/* If there is no user I/O let GC take over space on the write buffer */
+	rb_user_active = READ_ONCE(rl->rb_user_active);
+	return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+}
+
+void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
+{
+	atomic_add(nr_entries, &rl->rb_user_cnt);
+
+	/* Release user I/O state. Protect from GC */
+	smp_store_release(&rl->rb_user_active, 1);
+	pblk_rl_kick_u_timer(rl);
+}
+
+void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
+{
+	atomic_add(nr_entries, &rl->rb_gc_cnt);
+}
+
+void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
+{
+	atomic_sub(nr_user, &rl->rb_user_cnt);
+	atomic_sub(nr_gc, &rl->rb_gc_cnt);
+}
+
+unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
+{
+	return atomic_read(&rl->free_blocks);
+}
+
+/*
+ * We check for (i) the number of free blocks in the current LUN and (ii) the
+ * total number of free blocks in the pblk instance. This is to even out the
+ * number of free blocks on each LUN when GC kicks in.
+ *
+ * Only the total number of free blocks is used to configure the rate limiter.
+ */
+static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
+{
+	unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
+
+	if (free_blocks >= rl->high) {
+		rl->rb_user_max = max - rl->rb_gc_rsv;
+		rl->rb_gc_max = rl->rb_gc_rsv;
+		rl->rb_state = PBLK_RL_HIGH;
+	} else if (free_blocks < rl->high) {
+		int shift = rl->high_pw - rl->rb_windows_pw;
+		int user_windows = free_blocks >> shift;
+		int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
+		int gc_max;
+
+		rl->rb_user_max = user_max;
+		gc_max = max - rl->rb_user_max;
+		rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
+
+		if (free_blocks > rl->low)
+			rl->rb_state = PBLK_RL_MID;
+		else
+			rl->rb_state = PBLK_RL_LOW;
+	}
+
+	return rl->rb_state;
+}
+
+void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
+{
+	rl->rb_gc_rsv = rl->rb_gc_max = rsv;
+}
+
+void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
+{
+	struct pblk *pblk = container_of(rl, struct pblk, rl);
+	int blk_in_line = atomic_read(&line->blk_in_line);
+	int ret;
+
+	atomic_add(blk_in_line, &rl->free_blocks);
+	/* Rates will not change that often - no need to lock update */
+	ret = pblk_rl_update_rates(rl, rl->rb_budget);
+
+	if (ret == (PBLK_RL_MID | PBLK_RL_LOW))
+		pblk_gc_should_start(pblk);
+	else
+		pblk_gc_should_stop(pblk);
+}
+
+void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
+{
+	struct pblk *pblk = container_of(rl, struct pblk, rl);
+	int blk_in_line = atomic_read(&line->blk_in_line);
+	int ret;
+
+	atomic_sub(blk_in_line, &rl->free_blocks);
+
+	/* Rates will not change that often - no need to lock update */
+	ret = pblk_rl_update_rates(rl, rl->rb_budget);
+	if (ret == (PBLK_RL_MID | PBLK_RL_LOW))
+		pblk_gc_should_start(pblk);
+	else
+		pblk_gc_should_stop(pblk);
+}
+
+int pblk_rl_gc_thrs(struct pblk_rl *rl)
+{
+	return rl->high;
+}
+
+int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
+{
+	return rl->rb_user_max;
+}
+
+static void pblk_rl_u_timer(unsigned long data)
+{
+	struct pblk_rl *rl = (struct pblk_rl *)data;
+
+	/* Release user I/O state. Protect from GC */
+	smp_store_release(&rl->rb_user_active, 0);
+}
+
+void pblk_rl_free(struct pblk_rl *rl)
+{
+	del_timer(&rl->u_timer);
+}
+
+void pblk_rl_init(struct pblk_rl *rl, int budget)
+{
+	unsigned int rb_windows;
+
+	rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
+	rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+	rl->high_pw = get_count_order(rl->high);
+
+	/* This will always be a power-of-2 */
+	rb_windows = budget / PBLK_MAX_REQ_ADDRS;
+	rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+
+	/* To start with, all buffer is available to user I/O writers */
+	rl->rb_budget = budget;
+	rl->rb_user_max = budget;
+	atomic_set(&rl->rb_user_cnt, 0);
+	rl->rb_gc_max = 0;
+	rl->rb_state = PBLK_RL_HIGH;
+	atomic_set(&rl->rb_gc_cnt, 0);
+
+	setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+	rl->rb_user_active = 0;
+}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
new file mode 100644
index 000000000000..f0af1d1ceeff
--- /dev/null
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Implementation of a physical block-device target for Open-channel SSDs.
+ *
+ * pblk-sysfs.c - pblk's sysfs
+ *
+ */
+
+#include "pblk.h"
+
+static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_lun *rlun;
+	ssize_t sz = 0;
+	int i;
+
+	for (i = 0; i < geo->nr_luns; i++) {
+		int active = 1;
+
+		rlun = &pblk->luns[i];
+		if (!down_trylock(&rlun->wr_sem)) {
+			active = 0;
+			up(&rlun->wr_sem);
+		}
+		sz += snprintf(page + sz, PAGE_SIZE - sz,
+				"pblk: pos:%d, ch:%d, lun:%d - %d\n",
+					i,
+					rlun->bppa.g.ch,
+					rlun->bppa.g.lun,
+					active);
+	}
+
+	return sz;
+}
+
+static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int free_blocks, total_blocks;
+	int rb_user_max, rb_user_cnt;
+	int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+
+	free_blocks = atomic_read(&pblk->rl.free_blocks);
+	rb_user_max = pblk->rl.rb_user_max;
+	rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
+	rb_gc_max = pblk->rl.rb_gc_max;
+	rb_gc_rsv = pblk->rl.rb_gc_rsv;
+	rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
+	rb_budget = pblk->rl.rb_budget;
+	rb_state = pblk->rl.rb_state;
+
+	total_blocks = geo->blks_per_lun * geo->nr_luns;
+
+	return snprintf(page, PAGE_SIZE,
+		"u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+				rb_user_cnt,
+				rb_user_max,
+				rb_gc_cnt,
+				rb_gc_max,
+				rb_gc_rsv,
+				rb_state,
+				rb_budget,
+				pblk->rl.low,
+				pblk->rl.high,
+				free_blocks,
+				total_blocks,
+				READ_ONCE(pblk->rl.rb_user_active));
+}
+
+static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page)
+{
+	int gc_enabled, gc_active;
+
+	pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active);
+	return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n",
+					gc_enabled, gc_active);
+}
+
+static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page)
+{
+	ssize_t sz;
+
+	sz = snprintf(page, PAGE_SIZE,
+			"read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n",
+			atomic_long_read(&pblk->read_failed),
+			atomic_long_read(&pblk->read_high_ecc),
+			atomic_long_read(&pblk->read_empty),
+			atomic_long_read(&pblk->read_failed_gc),
+			atomic_long_read(&pblk->write_failed),
+			atomic_long_read(&pblk->erase_failed));
+
+	return sz;
+}
+
+static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page)
+{
+	return pblk_rb_sysfs(&pblk->rwb, page);
+}
+
+static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	ssize_t sz = 0;
+
+	sz = snprintf(page, PAGE_SIZE - sz,
+		"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
+		pblk->ppaf_bitsize,
+		pblk->ppaf.blk_offset, geo->ppaf.blk_len,
+		pblk->ppaf.pg_offset, geo->ppaf.pg_len,
+		pblk->ppaf.lun_offset, geo->ppaf.lun_len,
+		pblk->ppaf.ch_offset, geo->ppaf.ch_len,
+		pblk->ppaf.pln_offset, geo->ppaf.pln_len,
+		pblk->ppaf.sec_offset, geo->ppaf.sect_len);
+
+	sz += snprintf(page + sz, PAGE_SIZE - sz,
+		"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
+		geo->ppaf.blk_offset, geo->ppaf.blk_len,
+		geo->ppaf.pg_offset, geo->ppaf.pg_len,
+		geo->ppaf.lun_offset, geo->ppaf.lun_len,
+		geo->ppaf.ch_offset, geo->ppaf.ch_len,
+		geo->ppaf.pln_offset, geo->ppaf.pln_len,
+		geo->ppaf.sect_offset, geo->ppaf.sect_len);
+
+	return sz;
+}
+
+static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *line;
+	ssize_t sz = 0;
+	int nr_free_lines;
+	int cur_data, cur_log;
+	int free_line_cnt = 0, closed_line_cnt = 0;
+	int d_line_cnt = 0, l_line_cnt = 0;
+	int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
+	int free = 0, bad = 0, cor = 0;
+	int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+	int map_weight = 0, meta_weight = 0;
+
+	spin_lock(&l_mg->free_lock);
+	cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1;
+	cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1;
+	nr_free_lines = l_mg->nr_free_lines;
+
+	list_for_each_entry(line, &l_mg->free_list, list)
+		free_line_cnt++;
+	spin_unlock(&l_mg->free_lock);
+
+	spin_lock(&l_mg->gc_lock);
+	list_for_each_entry(line, &l_mg->gc_full_list, list) {
+		if (line->type == PBLK_LINETYPE_DATA)
+			d_line_cnt++;
+		else if (line->type == PBLK_LINETYPE_LOG)
+			l_line_cnt++;
+		closed_line_cnt++;
+		gc_full++;
+	}
+
+	list_for_each_entry(line, &l_mg->gc_high_list, list) {
+		if (line->type == PBLK_LINETYPE_DATA)
+			d_line_cnt++;
+		else if (line->type == PBLK_LINETYPE_LOG)
+			l_line_cnt++;
+		closed_line_cnt++;
+		gc_high++;
+	}
+
+	list_for_each_entry(line, &l_mg->gc_mid_list, list) {
+		if (line->type == PBLK_LINETYPE_DATA)
+			d_line_cnt++;
+		else if (line->type == PBLK_LINETYPE_LOG)
+			l_line_cnt++;
+		closed_line_cnt++;
+		gc_mid++;
+	}
+
+	list_for_each_entry(line, &l_mg->gc_low_list, list) {
+		if (line->type == PBLK_LINETYPE_DATA)
+			d_line_cnt++;
+		else if (line->type == PBLK_LINETYPE_LOG)
+			l_line_cnt++;
+		closed_line_cnt++;
+		gc_low++;
+	}
+
+	list_for_each_entry(line, &l_mg->gc_empty_list, list) {
+		if (line->type == PBLK_LINETYPE_DATA)
+			d_line_cnt++;
+		else if (line->type == PBLK_LINETYPE_LOG)
+			l_line_cnt++;
+		closed_line_cnt++;
+		gc_empty++;
+	}
+
+	list_for_each_entry(line, &l_mg->free_list, list)
+		free++;
+	list_for_each_entry(line, &l_mg->bad_list, list)
+		bad++;
+	list_for_each_entry(line, &l_mg->corrupt_list, list)
+		cor++;
+	spin_unlock(&l_mg->gc_lock);
+
+	spin_lock(&l_mg->free_lock);
+	if (l_mg->data_line) {
+		cur_sec = l_mg->data_line->cur_sec;
+		msecs = l_mg->data_line->left_msecs;
+		ssecs = l_mg->data_line->left_ssecs;
+		vsc = l_mg->data_line->vsc;
+		sec_in_line = l_mg->data_line->sec_in_line;
+		meta_weight = bitmap_weight(&l_mg->meta_bitmap,
+							PBLK_DATA_LINES);
+		map_weight = bitmap_weight(l_mg->data_line->map_bitmap,
+							lm->sec_per_line);
+	}
+	spin_unlock(&l_mg->free_lock);
+
+	if (nr_free_lines != free_line_cnt)
+		pr_err("pblk: corrupted free line list\n");
+
+	sz = snprintf(page, PAGE_SIZE - sz,
+		"line: nluns:%d, nblks:%d, nsecs:%d\n",
+		geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
+
+	sz += snprintf(page + sz, PAGE_SIZE - sz,
+		"lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+					cur_data, cur_log,
+					free, nr_free_lines, bad, cor,
+					closed_line_cnt,
+					d_line_cnt, l_line_cnt,
+					l_mg->nr_lines);
+
+	sz += snprintf(page + sz, PAGE_SIZE - sz,
+		"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
+			gc_full, gc_high, gc_mid, gc_low, gc_empty,
+			atomic_read(&pblk->gc.inflight_gc));
+
+	sz += snprintf(page + sz, PAGE_SIZE - sz,
+		"data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+			cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
+			map_weight, lm->sec_per_line, meta_weight);
+
+	return sz;
+}
+
+static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	ssize_t sz = 0;
+
+	sz = snprintf(page, PAGE_SIZE - sz,
+				"smeta - len:%d, secs:%d\n",
+					lm->smeta_len, lm->smeta_sec);
+	sz += snprintf(page + sz, PAGE_SIZE - sz,
+				"emeta - len:%d, sec:%d, bb_start:%d\n",
+					lm->emeta_len, lm->emeta_sec,
+					lm->emeta_bb);
+	sz += snprintf(page + sz, PAGE_SIZE - sz,
+				"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
+					lm->sec_bitmap_len,
+					lm->blk_bitmap_len,
+					lm->lun_bitmap_len);
+	sz += snprintf(page + sz, PAGE_SIZE - sz,
+				"blk_line:%d, sec_line:%d, sec_blk:%d\n",
+					lm->blk_per_line,
+					lm->sec_per_line,
+					geo->sec_per_blk);
+
+	return sz;
+}
+
+#ifdef CONFIG_NVM_DEBUG
+static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
+{
+	return snprintf(page, PAGE_SIZE,
+		"%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
+			atomic_long_read(&pblk->inflight_writes),
+			atomic_long_read(&pblk->inflight_reads),
+			atomic_long_read(&pblk->req_writes),
+			atomic_long_read(&pblk->nr_flush),
+			atomic_long_read(&pblk->padded_writes),
+			atomic_long_read(&pblk->padded_wb),
+			atomic_long_read(&pblk->sub_writes),
+			atomic_long_read(&pblk->sync_writes),
+			atomic_long_read(&pblk->compl_writes),
+			atomic_long_read(&pblk->recov_writes),
+			atomic_long_read(&pblk->recov_gc_writes),
+			atomic_long_read(&pblk->recov_gc_reads),
+			atomic_long_read(&pblk->sync_reads));
+}
+#endif
+
+static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
+				     size_t len)
+{
+	struct pblk_gc *gc = &pblk->gc;
+	size_t c_len;
+	int value;
+
+	c_len = strcspn(page, "\n");
+	if (c_len >= len)
+		return -EINVAL;
+
+	if (kstrtouint(page, 0, &value))
+		return -EINVAL;
+
+	spin_lock(&gc->lock);
+	pblk_rl_set_gc_rsc(&pblk->rl, value);
+	spin_unlock(&gc->lock);
+
+	return len;
+}
+
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+				   size_t len)
+{
+	size_t c_len;
+	int force;
+
+	c_len = strcspn(page, "\n");
+	if (c_len >= len)
+		return -EINVAL;
+
+	if (kstrtouint(page, 0, &force))
+		return -EINVAL;
+
+	if (force < 0 || force > 1)
+		return -EINVAL;
+
+	pblk_gc_sysfs_force(pblk, force);
+
+	return len;
+}
+
+static struct attribute sys_write_luns = {
+	.name = "write_luns",
+	.mode = 0444,
+};
+
+static struct attribute sys_rate_limiter_attr = {
+	.name = "rate_limiter",
+	.mode = 0444,
+};
+
+static struct attribute sys_gc_state = {
+	.name = "gc_state",
+	.mode = 0444,
+};
+
+static struct attribute sys_errors_attr = {
+	.name = "errors",
+	.mode = 0444,
+};
+
+static struct attribute sys_rb_attr = {
+	.name = "write_buffer",
+	.mode = 0444,
+};
+
+static struct attribute sys_stats_ppaf_attr = {
+	.name = "ppa_format",
+	.mode = 0444,
+};
+
+static struct attribute sys_lines_attr = {
+	.name = "lines",
+	.mode = 0444,
+};
+
+static struct attribute sys_lines_info_attr = {
+	.name = "lines_info",
+	.mode = 0444,
+};
+
+static struct attribute sys_gc_force = {
+	.name = "gc_force",
+	.mode = 0200,
+};
+
+static struct attribute sys_gc_rl_max = {
+	.name = "gc_rl_max",
+	.mode = 0200,
+};
+
+#ifdef CONFIG_NVM_DEBUG
+static struct attribute sys_stats_debug_attr = {
+	.name = "stats",
+	.mode = 0444,
+};
+#endif
+
+static struct attribute *pblk_attrs[] = {
+	&sys_write_luns,
+	&sys_rate_limiter_attr,
+	&sys_errors_attr,
+	&sys_gc_state,
+	&sys_gc_force,
+	&sys_gc_rl_max,
+	&sys_rb_attr,
+	&sys_stats_ppaf_attr,
+	&sys_lines_attr,
+	&sys_lines_info_attr,
+#ifdef CONFIG_NVM_DEBUG
+	&sys_stats_debug_attr,
+#endif
+	NULL,
+};
+
+static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
+			       char *buf)
+{
+	struct pblk *pblk = container_of(kobj, struct pblk, kobj);
+
+	if (strcmp(attr->name, "rate_limiter") == 0)
+		return pblk_sysfs_rate_limiter(pblk, buf);
+	else if (strcmp(attr->name, "write_luns") == 0)
+		return pblk_sysfs_luns_show(pblk, buf);
+	else if (strcmp(attr->name, "gc_state") == 0)
+		return pblk_sysfs_gc_state_show(pblk, buf);
+	else if (strcmp(attr->name, "errors") == 0)
+		return pblk_sysfs_stats(pblk, buf);
+	else if (strcmp(attr->name, "write_buffer") == 0)
+		return pblk_sysfs_write_buffer(pblk, buf);
+	else if (strcmp(attr->name, "ppa_format") == 0)
+		return pblk_sysfs_ppaf(pblk, buf);
+	else if (strcmp(attr->name, "lines") == 0)
+		return pblk_sysfs_lines(pblk, buf);
+	else if (strcmp(attr->name, "lines_info") == 0)
+		return pblk_sysfs_lines_info(pblk, buf);
+#ifdef CONFIG_NVM_DEBUG
+	else if (strcmp(attr->name, "stats") == 0)
+		return pblk_sysfs_stats_debug(pblk, buf);
+#endif
+	return 0;
+}
+
+static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
+				const char *buf, size_t len)
+{
+	struct pblk *pblk = container_of(kobj, struct pblk, kobj);
+
+	if (strcmp(attr->name, "gc_rl_max") == 0)
+		return pblk_sysfs_rate_store(pblk, buf, len);
+	else if (strcmp(attr->name, "gc_force") == 0)
+		return pblk_sysfs_gc_force(pblk, buf, len);
+
+	return 0;
+}
+
+static const struct sysfs_ops pblk_sysfs_ops = {
+	.show = pblk_sysfs_show,
+	.store = pblk_sysfs_store,
+};
+
+static struct kobj_type pblk_ktype = {
+	.sysfs_ops	= &pblk_sysfs_ops,
+	.default_attrs	= pblk_attrs,
+};
+
+int pblk_sysfs_init(struct gendisk *tdisk)
+{
+	struct pblk *pblk = tdisk->private_data;
+	struct device *parent_dev = disk_to_dev(pblk->disk);
+	int ret;
+
+	ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype,
+					kobject_get(&parent_dev->kobj),
+					"%s", "pblk");
+	if (ret) {
+		pr_err("pblk: could not register %s/pblk\n",
+						tdisk->disk_name);
+		return ret;
+	}
+
+	kobject_uevent(&pblk->kobj, KOBJ_ADD);
+	return 0;
+}
+
+void pblk_sysfs_exit(struct gendisk *tdisk)
+{
+	struct pblk *pblk = tdisk->private_data;
+
+	kobject_uevent(&pblk->kobj, KOBJ_REMOVE);
+	kobject_del(&pblk->kobj);
+	kobject_put(&pblk->kobj);
+}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
new file mode 100644
index 000000000000..aef6fd7c4a0c
--- /dev/null
+++ b/drivers/lightnvm/pblk-write.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Javier Gonzalez <javier@cnexlabs.com>
+ *                  Matias Bjorling <matias@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * pblk-write.c - pblk's write path from write buffer to media
+ */
+
+#include "pblk.h"
+
+static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
+{
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_inc(&pblk->sync_writes);
+#endif
+
+	/* Counter protected by rb sync lock */
+	line->left_ssecs--;
+	if (!line->left_ssecs)
+		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
+}
+
+static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
+				    struct pblk_c_ctx *c_ctx)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct bio *original_bio;
+	unsigned long ret;
+	int i;
+
+	for (i = 0; i < c_ctx->nr_valid; i++) {
+		struct pblk_w_ctx *w_ctx;
+		struct ppa_addr p;
+		struct pblk_line *line;
+
+		w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
+
+		p = rqd->ppa_list[i];
+		line = &pblk->lines[pblk_dev_ppa_to_line(p)];
+		pblk_sync_line(pblk, line);
+
+		while ((original_bio = bio_list_pop(&w_ctx->bios)))
+			bio_endio(original_bio);
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+#endif
+
+	ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
+
+	if (rqd->meta_list)
+		nvm_dev_dma_free(dev->parent, rqd->meta_list,
+							rqd->dma_meta_list);
+
+	bio_put(rqd->bio);
+	pblk_free_rqd(pblk, rqd, WRITE);
+
+	return ret;
+}
+
+static unsigned long pblk_end_queued_w_bio(struct pblk *pblk,
+					   struct nvm_rq *rqd,
+					   struct pblk_c_ctx *c_ctx)
+{
+	list_del(&c_ctx->list);
+	return pblk_end_w_bio(pblk, rqd, c_ctx);
+}
+
+static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd,
+				struct pblk_c_ctx *c_ctx)
+{
+	struct pblk_c_ctx *c, *r;
+	unsigned long flags;
+	unsigned long pos;
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes);
+#endif
+
+	pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap);
+
+	pos = pblk_rb_sync_init(&pblk->rwb, &flags);
+	if (pos == c_ctx->sentry) {
+		pos = pblk_end_w_bio(pblk, rqd, c_ctx);
+
+retry:
+		list_for_each_entry_safe(c, r, &pblk->compl_list, list) {
+			rqd = nvm_rq_from_c_ctx(c);
+			if (c->sentry == pos) {
+				pos = pblk_end_queued_w_bio(pblk, rqd, c);
+				goto retry;
+			}
+		}
+	} else {
+		WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd);
+		list_add_tail(&c_ctx->list, &pblk->compl_list);
+	}
+	pblk_rb_sync_end(&pblk->rwb, &flags);
+}
+
+/* When a write fails, we are not sure whether the block has grown bad or a page
+ * range is more susceptible to write errors. If a high number of pages fail, we
+ * assume that the block is bad and we mark it accordingly. In all cases, we
+ * remap and resubmit the failed entries as fast as possible; if a flush is
+ * waiting on a completion, the whole stack would stall otherwise.
+ */
+static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	void *comp_bits = &rqd->ppa_status;
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+	struct pblk_rec_ctx *recovery;
+	struct ppa_addr *ppa_list = rqd->ppa_list;
+	int nr_ppas = rqd->nr_ppas;
+	unsigned int c_entries;
+	int bit, ret;
+
+	if (unlikely(nr_ppas == 1))
+		ppa_list = &rqd->ppa_addr;
+
+	recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
+	if (!recovery) {
+		pr_err("pblk: could not allocate recovery context\n");
+		return;
+	}
+	INIT_LIST_HEAD(&recovery->failed);
+
+	bit = -1;
+	while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
+		struct pblk_rb_entry *entry;
+		struct ppa_addr ppa;
+
+		/* Logic error */
+		if (bit > c_ctx->nr_valid) {
+			WARN_ONCE(1, "pblk: corrupted write request\n");
+			mempool_free(recovery, pblk->rec_pool);
+			goto out;
+		}
+
+		ppa = ppa_list[bit];
+		entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa);
+		if (!entry) {
+			pr_err("pblk: could not scan entry on write failure\n");
+			mempool_free(recovery, pblk->rec_pool);
+			goto out;
+		}
+
+		/* The list is filled first and emptied afterwards. No need for
+		 * protecting it with a lock
+		 */
+		list_add_tail(&entry->index, &recovery->failed);
+	}
+
+	c_entries = find_first_bit(comp_bits, nr_ppas);
+	ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries);
+	if (ret) {
+		pr_err("pblk: could not recover from write failure\n");
+		mempool_free(recovery, pblk->rec_pool);
+		goto out;
+	}
+
+	INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
+	queue_work(pblk->kw_wq, &recovery->ws_rec);
+
+out:
+	pblk_complete_write(pblk, rqd, c_ctx);
+}
+
+static void pblk_end_io_write(struct nvm_rq *rqd)
+{
+	struct pblk *pblk = rqd->private;
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+
+	if (rqd->error) {
+		pblk_log_write_err(pblk, rqd);
+		return pblk_end_w_fail(pblk, rqd);
+	}
+#ifdef CONFIG_NVM_DEBUG
+	else
+		WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+#endif
+
+	pblk_complete_write(pblk, rqd, c_ctx);
+}
+
+static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
+			   unsigned int nr_secs)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+
+	/* Setup write request */
+	rqd->opcode = NVM_OP_PWRITE;
+	rqd->nr_ppas = nr_secs;
+	rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+	rqd->private = pblk;
+	rqd->end_io = pblk_end_io_write;
+
+	rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd->dma_meta_list);
+	if (!rqd->meta_list)
+		return -ENOMEM;
+
+	if (unlikely(nr_secs == 1))
+		return 0;
+
+	rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+	rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
+
+	return 0;
+}
+
+static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
+			   struct pblk_c_ctx *c_ctx)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+	struct ppa_addr erase_ppa;
+	unsigned int valid = c_ctx->nr_valid;
+	unsigned int padded = c_ctx->nr_padded;
+	unsigned int nr_secs = valid + padded;
+	unsigned long *lun_bitmap;
+	int ret = 0;
+
+	lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
+	if (!lun_bitmap) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	c_ctx->lun_bitmap = lun_bitmap;
+
+	ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+	if (ret) {
+		kfree(lun_bitmap);
+		goto out;
+	}
+
+	ppa_set_empty(&erase_ppa);
+	if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
+		pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
+	else
+		pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
+							valid, &erase_ppa);
+
+out:
+	if (unlikely(e_line && !ppa_empty(erase_ppa))) {
+		if (pblk_blk_erase_async(pblk, erase_ppa)) {
+			struct nvm_tgt_dev *dev = pblk->dev;
+			struct nvm_geo *geo = &dev->geo;
+			int bit;
+
+			atomic_inc(&e_line->left_eblks);
+			bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
+			WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+			up(&pblk->erase_sem);
+		}
+	}
+
+	return ret;
+}
+
+int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
+			struct pblk_c_ctx *c_ctx)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	unsigned long *lun_bitmap;
+	int ret;
+
+	lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
+	if (!lun_bitmap)
+		return -ENOMEM;
+
+	c_ctx->lun_bitmap = lun_bitmap;
+
+	ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+	if (ret)
+		return ret;
+
+	pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
+
+	rqd->ppa_status = (u64)0;
+	rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+
+	return ret;
+}
+
+static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
+				  unsigned int secs_to_flush)
+{
+	int secs_to_sync;
+
+	secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush);
+
+#ifdef CONFIG_NVM_DEBUG
+	if ((!secs_to_sync && secs_to_flush)
+			|| (secs_to_sync < 0)
+			|| (secs_to_sync > secs_avail && !secs_to_flush)) {
+		pr_err("pblk: bad sector calculation (a:%d,s:%d,f:%d)\n",
+				secs_avail, secs_to_sync, secs_to_flush);
+	}
+#endif
+
+	return secs_to_sync;
+}
+
+static int pblk_submit_write(struct pblk *pblk)
+{
+	struct bio *bio;
+	struct nvm_rq *rqd;
+	struct pblk_c_ctx *c_ctx;
+	unsigned int pgs_read;
+	unsigned int secs_avail, secs_to_sync, secs_to_com;
+	unsigned int secs_to_flush;
+	unsigned long pos;
+	int err;
+
+	/* If there are no sectors in the cache, flushes (bios without data)
+	 * will be cleared on the cache threads
+	 */
+	secs_avail = pblk_rb_read_count(&pblk->rwb);
+	if (!secs_avail)
+		return 1;
+
+	secs_to_flush = pblk_rb_sync_point_count(&pblk->rwb);
+	if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
+		return 1;
+
+	rqd = pblk_alloc_rqd(pblk, WRITE);
+	if (IS_ERR(rqd)) {
+		pr_err("pblk: cannot allocate write req.\n");
+		return 1;
+	}
+	c_ctx = nvm_rq_to_pdu(rqd);
+
+	bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
+	if (!bio) {
+		pr_err("pblk: cannot allocate write bio\n");
+		goto fail_free_rqd;
+	}
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+	rqd->bio = bio;
+
+	secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
+	if (secs_to_sync > pblk->max_write_pgs) {
+		pr_err("pblk: bad buffer sync calculation\n");
+		goto fail_put_bio;
+	}
+
+	secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
+	pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
+
+	pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
+						secs_to_sync, secs_avail);
+	if (!pgs_read) {
+		pr_err("pblk: corrupted write bio\n");
+		goto fail_put_bio;
+	}
+
+	if (c_ctx->nr_padded)
+		if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
+			goto fail_put_bio;
+
+	/* Assign lbas to ppas and populate request structure */
+	err = pblk_setup_w_rq(pblk, rqd, c_ctx);
+	if (err) {
+		pr_err("pblk: could not setup write request\n");
+		goto fail_free_bio;
+	}
+
+	err = pblk_submit_io(pblk, rqd);
+	if (err) {
+		pr_err("pblk: I/O submission failed: %d\n", err);
+		goto fail_free_bio;
+	}
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(secs_to_sync, &pblk->sub_writes);
+#endif
+
+	return 0;
+
+fail_free_bio:
+	if (c_ctx->nr_padded)
+		pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+fail_put_bio:
+	bio_put(bio);
+fail_free_rqd:
+	pblk_free_rqd(pblk, rqd, WRITE);
+
+	return 1;
+}
+
+int pblk_write_ts(void *data)
+{
+	struct pblk *pblk = data;
+
+	while (!kthread_should_stop()) {
+		if (!pblk_submit_write(pblk))
+			continue;
+		set_current_state(TASK_INTERRUPTIBLE);
+		io_schedule();
+	}
+
+	return 0;
+}
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
new file mode 100644
index 000000000000..99f3186b5288
--- /dev/null
+++ b/drivers/lightnvm/pblk.h
@@ -0,0 +1,1121 @@
+/*
+ * Copyright (C) 2015 IT University of Copenhagen (rrpc.h)
+ * Copyright (C) 2016 CNEX Labs
+ * Initial release: Matias Bjorling <matias@cnexlabs.com>
+ * Write buffering: Javier Gonzalez <javier@cnexlabs.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Implementation of a Physical Block-device target for Open-channel SSDs.
+ *
+ */
+
+#ifndef PBLK_H_
+#define PBLK_H_
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/bio.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/vmalloc.h>
+#include <linux/crc32.h>
+#include <linux/uuid.h>
+
+#include <linux/lightnvm.h>
+
+/* Run only GC if less than 1/X blocks are free */
+#define GC_LIMIT_INVERSE 5
+#define GC_TIME_MSECS 1000
+
+#define PBLK_SECTOR (512)
+#define PBLK_EXPOSED_PAGE_SIZE (4096)
+#define PBLK_MAX_REQ_ADDRS (64)
+#define PBLK_MAX_REQ_ADDRS_PW (6)
+
+#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
+
+#define PBLK_COMMAND_TIMEOUT_MS 30000
+
+/* Max 512 LUNs per device */
+#define PBLK_MAX_LUNS_BITMAP (4)
+
+#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR)
+
+#define pblk_for_each_lun(pblk, rlun, i) \
+		for ((i) = 0, rlun = &(pblk)->luns[0]; \
+			(i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)])
+
+#define ERASE 2 /* READ = 0, WRITE = 1 */
+
+enum {
+	/* IO Types */
+	PBLK_IOTYPE_USER	= 1 << 0,
+	PBLK_IOTYPE_GC		= 1 << 1,
+
+	/* Write buffer flags */
+	PBLK_FLUSH_ENTRY	= 1 << 2,
+	PBLK_WRITTEN_DATA	= 1 << 3,
+	PBLK_SUBMITTED_ENTRY	= 1 << 4,
+	PBLK_WRITABLE_ENTRY	= 1 << 5,
+};
+
+enum {
+	PBLK_BLK_ST_OPEN =	0x1,
+	PBLK_BLK_ST_CLOSED =	0x2,
+};
+
+/* The number of GC lists and the rate-limiter states go together. This way the
+ * rate-limiter can dictate how much GC is needed based on resource utilization.
+ */
+#define PBLK_NR_GC_LISTS 3
+#define PBLK_MAX_GC_JOBS 32
+
+enum {
+	PBLK_RL_HIGH = 1,
+	PBLK_RL_MID = 2,
+	PBLK_RL_LOW = 3,
+};
+
+struct pblk_sec_meta {
+	u64 reserved;
+	__le64 lba;
+};
+
+#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
+
+/* write completion context */
+struct pblk_c_ctx {
+	struct list_head list;		/* Head for out-of-order completion */
+
+	unsigned long *lun_bitmap;	/* Luns used on current request */
+	unsigned int sentry;
+	unsigned int nr_valid;
+	unsigned int nr_padded;
+};
+
+/* Read context */
+struct pblk_r_ctx {
+	struct bio *orig_bio;
+};
+
+/* Recovery context */
+struct pblk_rec_ctx {
+	struct pblk *pblk;
+	struct nvm_rq *rqd;
+	struct list_head failed;
+	struct work_struct ws_rec;
+};
+
+/* Write context */
+struct pblk_w_ctx {
+	struct bio_list bios;		/* Original bios - used for completion
+					 * in REQ_FUA, REQ_FLUSH case
+					 */
+	u64 lba;			/* Logic addr. associated with entry */
+	struct ppa_addr ppa;		/* Physic addr. associated with entry */
+	int flags;			/* Write context flags */
+};
+
+struct pblk_rb_entry {
+	struct ppa_addr cacheline;	/* Cacheline for this entry */
+	void *data;			/* Pointer to data on this entry */
+	struct pblk_w_ctx w_ctx;	/* Context for this entry */
+	struct list_head index;		/* List head to enable indexes */
+};
+
+#define EMPTY_ENTRY (~0U)
+
+struct pblk_rb_pages {
+	struct page *pages;
+	int order;
+	struct list_head list;
+};
+
+struct pblk_rb {
+	struct pblk_rb_entry *entries;	/* Ring buffer entries */
+	unsigned int mem;		/* Write offset - points to next
+					 * writable entry in memory
+					 */
+	unsigned int subm;		/* Read offset - points to last entry
+					 * that has been submitted to the media
+					 * to be persisted
+					 */
+	unsigned int sync;		/* Synced - backpointer that signals
+					 * the last submitted entry that has
+					 * been successfully persisted to media
+					 */
+	unsigned int sync_point;	/* Sync point - last entry that must be
+					 * flushed to the media. Used with
+					 * REQ_FLUSH and REQ_FUA
+					 */
+	unsigned int l2p_update;	/* l2p update point - next entry for
+					 * which l2p mapping will be updated to
+					 * contain a device ppa address (instead
+					 * of a cacheline
+					 */
+	unsigned int nr_entries;	/* Number of entries in write buffer -
+					 * must be a power of two
+					 */
+	unsigned int seg_size;		/* Size of the data segments being
+					 * stored on each entry. Typically this
+					 * will be 4KB
+					 */
+
+	struct list_head pages;		/* List of data pages */
+
+	spinlock_t w_lock;		/* Write lock */
+	spinlock_t s_lock;		/* Sync lock */
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_t inflight_sync_point;	/* Not served REQ_FLUSH | REQ_FUA */
+#endif
+};
+
+#define PBLK_RECOVERY_SECTORS 16
+
+struct pblk_lun {
+	struct ppa_addr bppa;
+
+	u8 *bb_list;			/* Bad block list for LUN. Only used on
+					 * bring up. Bad blocks are managed
+					 * within lines on run-time.
+					 */
+
+	struct semaphore wr_sem;
+};
+
+struct pblk_gc_rq {
+	struct pblk_line *line;
+	void *data;
+	u64 *lba_list;
+	int nr_secs;
+	int secs_to_gc;
+	struct list_head list;
+};
+
+struct pblk_gc {
+	int gc_active;
+	int gc_enabled;
+	int gc_forced;
+	int gc_jobs_active;
+	atomic_t inflight_gc;
+
+	struct task_struct *gc_ts;
+	struct task_struct *gc_writer_ts;
+	struct workqueue_struct *gc_reader_wq;
+	struct timer_list gc_timer;
+
+	int w_entries;
+	struct list_head w_list;
+
+	spinlock_t lock;
+	spinlock_t w_lock;
+};
+
+struct pblk_rl {
+	unsigned int high;	/* Upper threshold for rate limiter (free run -
+				 * user I/O rate limiter
+				 */
+	unsigned int low;	/* Lower threshold for rate limiter (user I/O
+				 * rate limiter - stall)
+				 */
+	unsigned int high_pw;	/* High rounded up as a power of 2 */
+
+#define PBLK_USER_HIGH_THRS 2	/* Begin write limit at 50 percent
+				 * available blks
+				 */
+#define PBLK_USER_LOW_THRS 20	/* Aggressive GC at 5% available blocks */
+
+	int rb_windows_pw;	/* Number of rate windows in the write buffer
+				 * given as a power-of-2. This guarantees that
+				 * when user I/O is being rate limited, there
+				 * will be reserved enough space for the GC to
+				 * place its payload. A window is of
+				 * pblk->max_write_pgs size, which in NVMe is
+				 * 64, i.e., 256kb.
+				 */
+	int rb_budget;		/* Total number of entries available for I/O */
+	int rb_user_max;	/* Max buffer entries available for user I/O */
+	atomic_t rb_user_cnt;	/* User I/O buffer counter */
+	int rb_gc_max;		/* Max buffer entries available for GC I/O */
+	int rb_gc_rsv;		/* Reserved buffer entries for GC I/O */
+	int rb_state;		/* Rate-limiter current state */
+	atomic_t rb_gc_cnt;	/* GC I/O buffer counter */
+
+	int rb_user_active;
+	struct timer_list u_timer;
+
+	unsigned long long nr_secs;
+	unsigned long total_blocks;
+	atomic_t free_blocks;
+};
+
+#define PBLK_LINE_NR_LUN_BITMAP 2
+#define PBLK_LINE_NR_SEC_BITMAP 2
+#define PBLK_LINE_EMPTY (~0U)
+
+enum {
+	/* Line Types */
+	PBLK_LINETYPE_FREE = 0,
+	PBLK_LINETYPE_LOG = 1,
+	PBLK_LINETYPE_DATA = 2,
+
+	/* Line state */
+	PBLK_LINESTATE_FREE = 10,
+	PBLK_LINESTATE_OPEN = 11,
+	PBLK_LINESTATE_CLOSED = 12,
+	PBLK_LINESTATE_GC = 13,
+	PBLK_LINESTATE_BAD = 14,
+	PBLK_LINESTATE_CORRUPT = 15,
+
+	/* GC group */
+	PBLK_LINEGC_NONE = 20,
+	PBLK_LINEGC_EMPTY = 21,
+	PBLK_LINEGC_LOW = 22,
+	PBLK_LINEGC_MID = 23,
+	PBLK_LINEGC_HIGH = 24,
+	PBLK_LINEGC_FULL = 25,
+};
+
+#define PBLK_MAGIC 0x70626c6b /*pblk*/
+
+struct line_header {
+	__le32 crc;
+	__le32 identifier;	/* pblk identifier */
+	__u8 uuid[16];		/* instance uuid */
+	__le16 type;		/* line type */
+	__le16 version;		/* type version */
+	__le32 id;		/* line id for current line */
+};
+
+struct line_smeta {
+	struct line_header header;
+
+	__le32 crc;		/* Full structure including struct crc */
+	/* Previous line metadata */
+	__le32 prev_id;		/* Line id for previous line */
+
+	/* Current line metadata */
+	__le64 seq_nr;		/* Sequence number for current line */
+
+	/* Active writers */
+	__le32 window_wr_lun;	/* Number of parallel LUNs to write */
+
+	__le32 rsvd[2];
+};
+
+/*
+ * Metadata Layout:
+ *	1. struct pblk_emeta
+ *	2. nr_lbas u64 forming lba list
+ *	3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
+ *	4. nr_luns bits (u64 format) forming line bad block bitmap
+ *
+ *	3. and 4. will be part of FTL log
+ */
+struct line_emeta {
+	struct line_header header;
+
+	__le32 crc;		/* Full structure including struct crc */
+
+	/* Previous line metadata */
+	__le32 prev_id;		/* Line id for prev line */
+
+	/* Current line metadata */
+	__le64 seq_nr;		/* Sequence number for current line */
+
+	/* Active writers */
+	__le32 window_wr_lun;	/* Number of parallel LUNs to write */
+
+	/* Bookkeeping for recovery */
+	__le32 next_id;		/* Line id for next line */
+	__le64 nr_lbas;		/* Number of lbas mapped in line */
+	__le64 nr_valid_lbas;	/* Number of valid lbas mapped in line */
+};
+
+struct pblk_line {
+	struct pblk *pblk;
+	unsigned int id;		/* Line number corresponds to the
+					 * block line
+					 */
+	unsigned int seq_nr;		/* Unique line sequence number */
+
+	int state;			/* PBLK_LINESTATE_X */
+	int type;			/* PBLK_LINETYPE_X */
+	int gc_group;			/* PBLK_LINEGC_X */
+	struct list_head list;		/* Free, GC lists */
+
+	unsigned long *lun_bitmap;	/* Bitmap for LUNs mapped in line */
+
+	struct line_smeta *smeta;	/* Start metadata */
+	struct line_emeta *emeta;	/* End metadata */
+	int meta_line;			/* Metadata line id */
+	u64 smeta_ssec;			/* Sector where smeta starts */
+	u64 emeta_ssec;			/* Sector where emeta starts */
+
+	unsigned int sec_in_line;	/* Number of usable secs in line */
+
+	atomic_t blk_in_line;		/* Number of good blocks in line */
+	unsigned long *blk_bitmap;	/* Bitmap for valid/invalid blocks */
+	unsigned long *erase_bitmap;	/* Bitmap for erased blocks */
+
+	unsigned long *map_bitmap;	/* Bitmap for mapped sectors in line */
+	unsigned long *invalid_bitmap;	/* Bitmap for invalid sectors in line */
+
+	atomic_t left_eblks;		/* Blocks left for erasing */
+	atomic_t left_seblks;		/* Blocks left for sync erasing */
+
+	int left_msecs;			/* Sectors left for mapping */
+	int left_ssecs;			/* Sectors left to sync */
+	unsigned int cur_sec;		/* Sector map pointer */
+	unsigned int vsc;		/* Valid sector count in line */
+
+	struct kref ref;		/* Write buffer L2P references */
+
+	spinlock_t lock;		/* Necessary for invalid_bitmap only */
+};
+
+#define PBLK_DATA_LINES 4
+
+enum{
+	PBLK_KMALLOC_META = 1,
+	PBLK_VMALLOC_META = 2,
+};
+
+struct pblk_line_metadata {
+	void *meta;
+};
+
+struct pblk_line_mgmt {
+	int nr_lines;			/* Total number of full lines */
+	int nr_free_lines;		/* Number of full lines in free list */
+
+	/* Free lists - use free_lock */
+	struct list_head free_list;	/* Full lines ready to use */
+	struct list_head corrupt_list;	/* Full lines corrupted */
+	struct list_head bad_list;	/* Full lines bad */
+
+	/* GC lists - use gc_lock */
+	struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+	struct list_head gc_high_list;	/* Full lines ready to GC, high isc */
+	struct list_head gc_mid_list;	/* Full lines ready to GC, mid isc */
+	struct list_head gc_low_list;	/* Full lines ready to GC, low isc */
+
+	struct list_head gc_full_list;	/* Full lines ready to GC, no valid */
+	struct list_head gc_empty_list;	/* Full lines close, all valid */
+
+	struct pblk_line *log_line;	/* Current FTL log line */
+	struct pblk_line *data_line;	/* Current data line */
+	struct pblk_line *log_next;	/* Next FTL log line */
+	struct pblk_line *data_next;	/* Next data line */
+
+	/* Metadata allocation type: VMALLOC | KMALLOC */
+	int smeta_alloc_type;
+	int emeta_alloc_type;
+
+	/* Pre-allocated metadata for data lines */
+	struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
+	struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+	unsigned long meta_bitmap;
+
+	/* Helpers for fast bitmap calculations */
+	unsigned long *bb_template;
+	unsigned long *bb_aux;
+
+	unsigned long d_seq_nr;		/* Data line unique sequence number */
+	unsigned long l_seq_nr;		/* Log line unique sequence number */
+
+	spinlock_t free_lock;
+	spinlock_t gc_lock;
+};
+
+struct pblk_line_meta {
+	unsigned int smeta_len;		/* Total length for smeta */
+	unsigned int smeta_sec;		/* Sectors needed for smeta*/
+	unsigned int emeta_len;		/* Total length for emeta */
+	unsigned int emeta_sec;		/* Sectors needed for emeta*/
+	unsigned int emeta_bb;		/* Boundary for bb that affects emeta */
+	unsigned int sec_bitmap_len;	/* Length for sector bitmap in line */
+	unsigned int blk_bitmap_len;	/* Length for block bitmap in line */
+	unsigned int lun_bitmap_len;	/* Length for lun bitmap in line */
+
+	unsigned int blk_per_line;	/* Number of blocks in a full line */
+	unsigned int sec_per_line;	/* Number of sectors in a line */
+	unsigned int min_blk_line;	/* Min. number of good blocks in line */
+
+	unsigned int mid_thrs;		/* Threshold for GC mid list */
+	unsigned int high_thrs;		/* Threshold for GC high list */
+};
+
+struct pblk_addr_format {
+	u64	ch_mask;
+	u64	lun_mask;
+	u64	pln_mask;
+	u64	blk_mask;
+	u64	pg_mask;
+	u64	sec_mask;
+	u8	ch_offset;
+	u8	lun_offset;
+	u8	pln_offset;
+	u8	blk_offset;
+	u8	pg_offset;
+	u8	sec_offset;
+};
+
+struct pblk {
+	struct nvm_tgt_dev *dev;
+	struct gendisk *disk;
+
+	struct kobject kobj;
+
+	struct pblk_lun *luns;
+
+	struct pblk_line *lines;		/* Line array */
+	struct pblk_line_mgmt l_mg;		/* Line management */
+	struct pblk_line_meta lm;		/* Line metadata */
+
+	int ppaf_bitsize;
+	struct pblk_addr_format ppaf;
+
+	struct pblk_rb rwb;
+
+	int min_write_pgs; /* Minimum amount of pages required by controller */
+	int max_write_pgs; /* Maximum amount of pages supported by controller */
+	int pgs_in_buffer; /* Number of pages that need to be held in buffer to
+			    * guarantee successful reads.
+			    */
+
+	sector_t capacity; /* Device capacity when bad blocks are subtracted */
+	int over_pct;      /* Percentage of device used for over-provisioning */
+
+	/* pblk provisioning values. Used by rate limiter */
+	struct pblk_rl rl;
+
+	struct semaphore erase_sem;
+
+	unsigned char instance_uuid[16];
+#ifdef CONFIG_NVM_DEBUG
+	/* All debug counters apply to 4kb sector I/Os */
+	atomic_long_t inflight_writes;	/* Inflight writes (user and gc) */
+	atomic_long_t padded_writes;	/* Sectors padded due to flush/fua */
+	atomic_long_t padded_wb;	/* Sectors padded in write buffer */
+	atomic_long_t nr_flush;		/* Number of flush/fua I/O */
+	atomic_long_t req_writes;	/* Sectors stored on write buffer */
+	atomic_long_t sub_writes;	/* Sectors submitted from buffer */
+	atomic_long_t sync_writes;	/* Sectors synced to media */
+	atomic_long_t compl_writes;	/* Sectors completed in write bio */
+	atomic_long_t inflight_reads;	/* Inflight sector read requests */
+	atomic_long_t sync_reads;	/* Completed sector read requests */
+	atomic_long_t recov_writes;	/* Sectors submitted from recovery */
+	atomic_long_t recov_gc_writes;	/* Sectors submitted from write GC */
+	atomic_long_t recov_gc_reads;	/* Sectors submitted from read GC */
+#endif
+
+	spinlock_t lock;
+
+	atomic_long_t read_failed;
+	atomic_long_t read_empty;
+	atomic_long_t read_high_ecc;
+	atomic_long_t read_failed_gc;
+	atomic_long_t write_failed;
+	atomic_long_t erase_failed;
+
+	struct task_struct *writer_ts;
+
+	/* Simple translation map of logical addresses to physical addresses.
+	 * The logical addresses is known by the host system, while the physical
+	 * addresses are used when writing to the disk block device.
+	 */
+	unsigned char *trans_map;
+	spinlock_t trans_lock;
+
+	struct list_head compl_list;
+
+	mempool_t *page_pool;
+	mempool_t *line_ws_pool;
+	mempool_t *rec_pool;
+	mempool_t *r_rq_pool;
+	mempool_t *w_rq_pool;
+	mempool_t *line_meta_pool;
+
+	struct workqueue_struct *kw_wq;
+	struct timer_list wtimer;
+
+	struct pblk_gc gc;
+};
+
+struct pblk_line_ws {
+	struct pblk *pblk;
+	struct pblk_line *line;
+	void *priv;
+	struct work_struct ws;
+};
+
+#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
+
+/*
+ * pblk ring buffer operations
+ */
+int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
+		 unsigned int power_size, unsigned int power_seg_sz);
+unsigned int pblk_rb_calculate_size(unsigned int nr_entries);
+void *pblk_rb_entries_ref(struct pblk_rb *rb);
+int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
+			   unsigned int nr_entries, unsigned int *pos);
+int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
+			 unsigned int *pos);
+void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
+			      struct pblk_w_ctx w_ctx, unsigned int pos);
+void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
+			    struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
+			    unsigned int pos);
+struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+
+void pblk_rb_sync_l2p(struct pblk_rb *rb);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
+				 struct pblk_c_ctx *c_ctx,
+				 unsigned int pos,
+				 unsigned int nr_entries,
+				 unsigned int count);
+unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
+				      struct list_head *list,
+				      unsigned int max);
+int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
+			u64 pos, int bio_iter);
+unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
+
+unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
+unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries);
+struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
+					      struct ppa_addr *ppa);
+void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
+unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
+
+unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
+
+int pblk_rb_tear_down_check(struct pblk_rb *rb);
+int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos);
+void pblk_rb_data_free(struct pblk_rb *rb);
+ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
+
+/*
+ * pblk core
+ */
+struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
+			struct pblk_c_ctx *c_ctx);
+void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
+void pblk_flush_writer(struct pblk *pblk);
+struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
+void pblk_discard(struct pblk *pblk, struct bio *bio);
+void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
+void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
+			      unsigned int nr_secs, unsigned int len,
+			      gfp_t gfp_mask);
+struct pblk_line *pblk_line_get(struct pblk *pblk);
+struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
+struct pblk_line *pblk_line_get_data(struct pblk *pblk);
+struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_is_full(struct pblk_line *line);
+void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_mark_bb(struct work_struct *work);
+void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
+		      void (*work)(struct work_struct *));
+u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
+void pblk_line_put(struct kref *ref);
+struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
+		   unsigned long secs_to_flush);
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+		  unsigned long *lun_bitmap);
+void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+		unsigned long *lun_bitmap);
+void pblk_end_bio_sync(struct bio *bio);
+void pblk_end_io_sync(struct nvm_rq *rqd);
+int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
+		       int nr_pages);
+void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
+			     u64 paddr);
+void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
+			 int nr_pages);
+void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
+void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
+			   struct ppa_addr ppa);
+void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
+			 struct ppa_addr ppa, struct ppa_addr entry_line);
+int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
+		       struct pblk_line *gc_line);
+void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
+			  u64 *lba_list, int nr_secs);
+void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
+			 sector_t blba, int nr_secs);
+
+/*
+ * pblk user I/O write path
+ */
+int pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
+			unsigned long flags);
+int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
+			   unsigned int nr_entries, unsigned int nr_rec_entries,
+			   struct pblk_line *gc_line, unsigned long flags);
+
+/*
+ * pblk map
+ */
+void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
+		       unsigned int sentry, unsigned long *lun_bitmap,
+		       unsigned int valid_secs, struct ppa_addr *erase_ppa);
+void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
+		 unsigned long *lun_bitmap, unsigned int valid_secs,
+		 unsigned int off);
+
+/*
+ * pblk write thread
+ */
+int pblk_write_ts(void *data);
+void pblk_write_timer_fn(unsigned long data);
+void pblk_write_should_kick(struct pblk *pblk);
+
+/*
+ * pblk read path
+ */
+int pblk_submit_read(struct pblk *pblk, struct bio *bio);
+int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
+			unsigned int nr_secs, unsigned int *secs_to_gc,
+			struct pblk_line *line);
+/*
+ * pblk recovery
+ */
+void pblk_submit_rec(struct work_struct *work);
+struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
+void pblk_recov_pad(struct pblk *pblk);
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
+int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
+			struct pblk_rec_ctx *recovery, u64 *comp_bits,
+			unsigned int comp);
+
+/*
+ * pblk gc
+ */
+#define PBLK_GC_TRIES 3
+
+int pblk_gc_init(struct pblk *pblk);
+void pblk_gc_exit(struct pblk *pblk);
+void pblk_gc_should_start(struct pblk *pblk);
+void pblk_gc_should_stop(struct pblk *pblk);
+int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
+			      int *gc_active);
+void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+
+/*
+ * pblk rate limiter
+ */
+void pblk_rl_init(struct pblk_rl *rl, int budget);
+void pblk_rl_free(struct pblk_rl *rl);
+int pblk_rl_gc_thrs(struct pblk_rl *rl);
+unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
+int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
+int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
+void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
+int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
+void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+
+/*
+ * pblk sysfs
+ */
+int pblk_sysfs_init(struct gendisk *tdisk);
+void pblk_sysfs_exit(struct gendisk *tdisk);
+
+static inline void *pblk_malloc(size_t size, int type, gfp_t flags)
+{
+	if (type == PBLK_KMALLOC_META)
+		return kmalloc(size, flags);
+	return vmalloc(size);
+}
+
+static inline void pblk_mfree(void *ptr, int type)
+{
+	if (type == PBLK_KMALLOC_META)
+		kfree(ptr);
+	else
+		vfree(ptr);
+}
+
+static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
+{
+	return c_ctx - sizeof(struct nvm_rq);
+}
+
+static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+{
+	return (emeta) + 1;
+}
+
+#define NVM_MEM_PAGE_WRITE (8)
+
+static inline int pblk_pad_distance(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+
+	return NVM_MEM_PAGE_WRITE * geo->nr_luns * geo->sec_per_pl;
+}
+
+static inline int pblk_dev_ppa_to_line(struct ppa_addr p)
+{
+	return p.g.blk;
+}
+
+static inline int pblk_tgt_ppa_to_line(struct ppa_addr p)
+{
+	return p.g.blk;
+}
+
+static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
+{
+	return p.g.lun * geo->nr_chnls + p.g.ch;
+}
+
+/* A block within a line corresponds to the lun */
+static inline int pblk_dev_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
+{
+	return p.g.lun * geo->nr_chnls + p.g.ch;
+}
+
+static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
+{
+	struct ppa_addr ppa64;
+
+	ppa64.ppa = 0;
+
+	if (ppa32 == -1) {
+		ppa64.ppa = ADDR_EMPTY;
+	} else if (ppa32 & (1U << 31)) {
+		ppa64.c.line = ppa32 & ((~0U) >> 1);
+		ppa64.c.is_cached = 1;
+	} else {
+		ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >>
+							pblk->ppaf.blk_offset;
+		ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >>
+							pblk->ppaf.pg_offset;
+		ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >>
+							pblk->ppaf.lun_offset;
+		ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >>
+							pblk->ppaf.ch_offset;
+		ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >>
+							pblk->ppaf.pln_offset;
+		ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >>
+							pblk->ppaf.sec_offset;
+	}
+
+	return ppa64;
+}
+
+static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
+								sector_t lba)
+{
+	struct ppa_addr ppa;
+
+	if (pblk->ppaf_bitsize < 32) {
+		u32 *map = (u32 *)pblk->trans_map;
+
+		ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
+	} else {
+		struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map;
+
+		ppa = map[lba];
+	}
+
+	return ppa;
+}
+
+static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
+{
+	u32 ppa32 = 0;
+
+	if (ppa64.ppa == ADDR_EMPTY) {
+		ppa32 = ~0U;
+	} else if (ppa64.c.is_cached) {
+		ppa32 |= ppa64.c.line;
+		ppa32 |= 1U << 31;
+	} else {
+		ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset;
+		ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset;
+		ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset;
+		ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset;
+		ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset;
+		ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset;
+	}
+
+	return ppa32;
+}
+
+static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
+						struct ppa_addr ppa)
+{
+	if (pblk->ppaf_bitsize < 32) {
+		u32 *map = (u32 *)pblk->trans_map;
+
+		map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
+	} else {
+		u64 *map = (u64 *)pblk->trans_map;
+
+		map[lba] = ppa.ppa;
+	}
+}
+
+static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
+							struct ppa_addr p)
+{
+	u64 paddr;
+
+	paddr = 0;
+	paddr |= (u64)p.g.pg << pblk->ppaf.pg_offset;
+	paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset;
+	paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset;
+	paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset;
+	paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset;
+
+	return paddr;
+}
+
+static inline int pblk_ppa_empty(struct ppa_addr ppa_addr)
+{
+	return (ppa_addr.ppa == ADDR_EMPTY);
+}
+
+static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
+{
+	ppa_addr->ppa = ADDR_EMPTY;
+}
+
+static inline int pblk_addr_in_cache(struct ppa_addr ppa)
+{
+	return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
+}
+
+static inline int pblk_addr_to_cacheline(struct ppa_addr ppa)
+{
+	return ppa.c.line;
+}
+
+static inline struct ppa_addr pblk_cacheline_to_addr(int addr)
+{
+	struct ppa_addr p;
+
+	p.c.line = addr;
+	p.c.is_cached = 1;
+
+	return p;
+}
+
+static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
+					      u64 line_id)
+{
+	struct ppa_addr ppa;
+
+	ppa.ppa = 0;
+	ppa.g.blk = line_id;
+	ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset;
+	ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset;
+	ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset;
+	ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset;
+	ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset;
+
+	return ppa;
+}
+
+static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
+					 u64 line_id)
+{
+	struct ppa_addr ppa;
+
+	ppa = addr_to_gen_ppa(pblk, paddr, line_id);
+
+	return ppa;
+}
+
+static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
+					    struct line_smeta *smeta)
+{
+	u32 crc = ~(u32)0;
+
+	crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+				sizeof(struct line_header) - sizeof(crc));
+
+	return crc;
+}
+
+static inline u32 pblk_calc_smeta_crc(struct pblk *pblk,
+				      struct line_smeta *smeta)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	u32 crc = ~(u32)0;
+
+	crc = crc32_le(crc, (unsigned char *)smeta +
+				sizeof(struct line_header) + sizeof(crc),
+				lm->smeta_len -
+				sizeof(struct line_header) - sizeof(crc));
+
+	return crc;
+}
+
+static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
+				      struct line_emeta *emeta)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	u32 crc = ~(u32)0;
+
+	crc = crc32_le(crc, (unsigned char *)emeta +
+				sizeof(struct line_header) + sizeof(crc),
+				lm->emeta_len -
+				sizeof(struct line_header) - sizeof(crc));
+
+	return crc;
+}
+
+static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int flags;
+
+	flags = geo->plane_mode >> 1;
+
+	if (type == WRITE)
+		flags |= NVM_IO_SCRAMBLE_ENABLE;
+
+	return flags;
+}
+
+static inline int pblk_set_read_mode(struct pblk *pblk)
+{
+	return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+}
+
+#ifdef CONFIG_NVM_DEBUG
+static inline void print_ppa(struct ppa_addr *p, char *msg, int error)
+{
+	if (p->c.is_cached) {
+		pr_err("ppa: (%s: %x) cache line: %llu\n",
+				msg, error, (u64)p->c.line);
+	} else {
+		pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
+			msg, error,
+			p->g.ch, p->g.lun, p->g.blk,
+			p->g.pg, p->g.pl, p->g.sec);
+	}
+}
+
+static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
+					 int error)
+{
+	int bit = -1;
+
+	if (rqd->nr_ppas ==  1) {
+		print_ppa(&rqd->ppa_addr, "rqd", error);
+		return;
+	}
+
+	while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
+						bit + 1)) < rqd->nr_ppas) {
+		print_ppa(&rqd->ppa_list[bit], "rqd", error);
+	}
+
+	pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
+}
+#endif
+
+static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
+				       struct ppa_addr *ppas, int nr_ppas)
+{
+	struct nvm_geo *geo = &tgt_dev->geo;
+	struct ppa_addr *ppa;
+	int i;
+
+	for (i = 0; i < nr_ppas; i++) {
+		ppa = &ppas[i];
+
+		if (!ppa->c.is_cached &&
+				ppa->g.ch < geo->nr_chnls &&
+				ppa->g.lun < geo->luns_per_chnl &&
+				ppa->g.pl < geo->nr_planes &&
+				ppa->g.blk < geo->blks_per_lun &&
+				ppa->g.pg < geo->pgs_per_blk &&
+				ppa->g.sec < geo->sec_per_pg)
+			continue;
+
+#ifdef CONFIG_NVM_DEBUG
+		print_ppa(ppa, "boundary", i);
+#endif
+		return 1;
+	}
+	return 0;
+}
+
+static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+
+	if (paddr > lm->sec_per_line)
+		return 1;
+
+	return 0;
+}
+
+static inline unsigned int pblk_get_bi_idx(struct bio *bio)
+{
+	return bio->bi_iter.bi_idx;
+}
+
+static inline sector_t pblk_get_lba(struct bio *bio)
+{
+	return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
+}
+
+static inline unsigned int pblk_get_secs(struct bio *bio)
+{
+	return  bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE;
+}
+
+static inline sector_t pblk_get_sector(sector_t lba)
+{
+	return lba * NR_PHY_IN_LOG;
+}
+
+static inline void pblk_setup_uuid(struct pblk *pblk)
+{
+	uuid_le uuid;
+
+	uuid_le_gen(&uuid);
+	memcpy(pblk->instance_uuid, uuid.b, 16);
+}
+#endif /* PBLK_H_ */
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index e00b1d7b976f..cf0e28a0ff61 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -318,10 +318,6 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 	}
 
 	page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
-	if (!page) {
-		bio_put(bio);
-		return -ENOMEM;
-	}
 
 	while ((slot = find_first_zero_bit(rblk->invalid_pages,
 					    nr_sec_per_blk)) < nr_sec_per_blk) {
@@ -414,7 +410,6 @@ static void rrpc_block_gc(struct work_struct *work)
 	struct rrpc *rrpc = gcb->rrpc;
 	struct rrpc_block *rblk = gcb->rblk;
 	struct rrpc_lun *rlun = rblk->rlun;
-	struct nvm_tgt_dev *dev = rrpc->dev;
 	struct ppa_addr ppa;
 
 	mempool_free(gcb, rrpc->gcb_pool);
@@ -430,7 +425,7 @@ static void rrpc_block_gc(struct work_struct *work)
 	ppa.g.lun = rlun->bppa.g.lun;
 	ppa.g.blk = rblk->id;
 
-	if (nvm_erase_blk(dev, &ppa, 0))
+	if (nvm_erase_sync(rrpc->dev, &ppa, 1))
 		goto put_back;
 
 	rrpc_put_blk(rrpc, rblk);
@@ -822,7 +817,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 
 	for (i = 0; i < npages; i++) {
 		/* We assume that mapping occurs at 4KB granularity */
-		BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
+		BUG_ON(!(laddr + i < rrpc->nr_sects));
 		gp = &rrpc->trans_map[laddr + i];
 
 		if (gp->rblk) {
@@ -851,7 +846,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
 	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
 		return NVM_IO_REQUEUE;
 
-	BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
+	BUG_ON(!(laddr < rrpc->nr_sects));
 	gp = &rrpc->trans_map[laddr];
 
 	if (gp->rblk) {
@@ -1007,11 +1002,6 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	}
 
 	rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
-	if (!rqd) {
-		pr_err_ratelimited("rrpc: not able to queue bio.");
-		bio_io_error(bio);
-		return BLK_QC_T_NONE;
-	}
 	memset(rqd, 0, sizeof(struct nvm_rq));
 
 	err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
@@ -1275,8 +1265,10 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
 	}
 
 	nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
-	if (nr_blks < 0)
-		return nr_blks;
+	if (nr_blks < 0) {
+		ret = nr_blks;
+		goto out;
+	}
 
 	for (i = 0; i < nr_blks; i++) {
 		if (blks[i] == NVM_BLK_T_FREE)
@@ -1514,7 +1506,8 @@ err:
 
 static struct nvm_tgt_type tt_rrpc;
 
-static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk)
+static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
+		       int flags)
 {
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index ceff415f201c..ee1a3d9147ef 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -144,12 +144,22 @@ config XGENE_SLIMPRO_MBOX
 	  want to use the APM X-Gene SLIMpro IPCM support.
 
 config BCM_PDC_MBOX
-	tristate "Broadcom PDC Mailbox"
-	depends on ARM64 || COMPILE_TEST
+	tristate "Broadcom FlexSparx DMA Mailbox"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
 	depends on HAS_DMA
+	help
+	  Mailbox implementation for the Broadcom FlexSparx DMA ring manager,
+	  which provides access to various offload engines on Broadcom
+	  SoCs, including FA2/FA+ on Northstar Plus and PDC on Northstar 2.
+
+config BCM_FLEXRM_MBOX
+	tristate "Broadcom FlexRM Mailbox"
+	depends on ARM64
+	depends on HAS_DMA
+	select GENERIC_MSI_IRQ_DOMAIN
 	default ARCH_BCM_IPROC
 	help
-	  Mailbox implementation for the Broadcom PDC ring manager,
+	  Mailbox implementation of the Broadcom FlexRM ring manager,
 	  which provides access to various offload engines on Broadcom
-	  SoCs. Say Y here if you want to use the Broadcom PDC.
+	  SoCs. Say Y here if you want to use the Broadcom FlexRM.
 endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 7dde4f609ae8..e2bcb03cd35b 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -30,4 +30,6 @@ obj-$(CONFIG_HI6220_MBOX)	+= hi6220-mailbox.o
 
 obj-$(CONFIG_BCM_PDC_MBOX)	+= bcm-pdc-mailbox.o
 
+obj-$(CONFIG_BCM_FLEXRM_MBOX)	+= bcm-flexrm-mailbox.o
+
 obj-$(CONFIG_TEGRA_HSP_MBOX)	+= tegra-hsp.o
diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c
new file mode 100644
index 000000000000..da67882caa7b
--- /dev/null
+++ b/drivers/mailbox/bcm-flexrm-mailbox.c
@@ -0,0 +1,1595 @@
+/* Broadcom FlexRM Mailbox Driver
+ *
+ * Copyright (C) 2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Each Broadcom FlexSparx4 offload engine is implemented as an
+ * extension to Broadcom FlexRM ring manager. The FlexRM ring
+ * manager provides a set of rings which can be used to submit
+ * work to a FlexSparx4 offload engine.
+ *
+ * This driver creates a mailbox controller using a set of FlexRM
+ * rings where each mailbox channel represents a separate FlexRM ring.
+ */
+
+#include <asm/barrier.h>
+#include <asm/byteorder.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox/brcm-message.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+/* ====== FlexRM register defines ===== */
+
+/* FlexRM configuration */
+#define RING_REGS_SIZE					0x10000
+#define RING_DESC_SIZE					8
+#define RING_DESC_INDEX(offset)				\
+			((offset) / RING_DESC_SIZE)
+#define RING_DESC_OFFSET(index)				\
+			((index) * RING_DESC_SIZE)
+#define RING_MAX_REQ_COUNT				1024
+#define RING_BD_ALIGN_ORDER				12
+#define RING_BD_ALIGN_CHECK(addr)			\
+			(!((addr) & ((0x1 << RING_BD_ALIGN_ORDER) - 1)))
+#define RING_BD_TOGGLE_INVALID(offset)			\
+			(((offset) >> RING_BD_ALIGN_ORDER) & 0x1)
+#define RING_BD_TOGGLE_VALID(offset)			\
+			(!RING_BD_TOGGLE_INVALID(offset))
+#define RING_BD_DESC_PER_REQ				32
+#define RING_BD_DESC_COUNT				\
+			(RING_MAX_REQ_COUNT * RING_BD_DESC_PER_REQ)
+#define RING_BD_SIZE					\
+			(RING_BD_DESC_COUNT * RING_DESC_SIZE)
+#define RING_CMPL_ALIGN_ORDER				13
+#define RING_CMPL_DESC_COUNT				RING_MAX_REQ_COUNT
+#define RING_CMPL_SIZE					\
+			(RING_CMPL_DESC_COUNT * RING_DESC_SIZE)
+#define RING_VER_MAGIC					0x76303031
+
+/* Per-Ring register offsets */
+#define RING_VER					0x000
+#define RING_BD_START_ADDR				0x004
+#define RING_BD_READ_PTR				0x008
+#define RING_BD_WRITE_PTR				0x00c
+#define RING_BD_READ_PTR_DDR_LS				0x010
+#define RING_BD_READ_PTR_DDR_MS				0x014
+#define RING_CMPL_START_ADDR				0x018
+#define RING_CMPL_WRITE_PTR				0x01c
+#define RING_NUM_REQ_RECV_LS				0x020
+#define RING_NUM_REQ_RECV_MS				0x024
+#define RING_NUM_REQ_TRANS_LS				0x028
+#define RING_NUM_REQ_TRANS_MS				0x02c
+#define RING_NUM_REQ_OUTSTAND				0x030
+#define RING_CONTROL					0x034
+#define RING_FLUSH_DONE					0x038
+#define RING_MSI_ADDR_LS				0x03c
+#define RING_MSI_ADDR_MS				0x040
+#define RING_MSI_CONTROL				0x048
+#define RING_BD_READ_PTR_DDR_CONTROL			0x04c
+#define RING_MSI_DATA_VALUE				0x064
+
+/* Register RING_BD_START_ADDR fields */
+#define BD_LAST_UPDATE_HW_SHIFT				28
+#define BD_LAST_UPDATE_HW_MASK				0x1
+#define BD_START_ADDR_VALUE(pa)				\
+	((u32)((((dma_addr_t)(pa)) >> RING_BD_ALIGN_ORDER) & 0x0fffffff))
+#define BD_START_ADDR_DECODE(val)			\
+	((dma_addr_t)((val) & 0x0fffffff) << RING_BD_ALIGN_ORDER)
+
+/* Register RING_CMPL_START_ADDR fields */
+#define CMPL_START_ADDR_VALUE(pa)			\
+	((u32)((((u64)(pa)) >> RING_CMPL_ALIGN_ORDER) & 0x03ffffff))
+
+/* Register RING_CONTROL fields */
+#define CONTROL_MASK_DISABLE_CONTROL			12
+#define CONTROL_FLUSH_SHIFT				5
+#define CONTROL_ACTIVE_SHIFT				4
+#define CONTROL_RATE_ADAPT_MASK				0xf
+#define CONTROL_RATE_DYNAMIC				0x0
+#define CONTROL_RATE_FAST				0x8
+#define CONTROL_RATE_MEDIUM				0x9
+#define CONTROL_RATE_SLOW				0xa
+#define CONTROL_RATE_IDLE				0xb
+
+/* Register RING_FLUSH_DONE fields */
+#define FLUSH_DONE_MASK					0x1
+
+/* Register RING_MSI_CONTROL fields */
+#define MSI_TIMER_VAL_SHIFT				16
+#define MSI_TIMER_VAL_MASK				0xffff
+#define MSI_ENABLE_SHIFT				15
+#define MSI_ENABLE_MASK					0x1
+#define MSI_COUNT_SHIFT					0
+#define MSI_COUNT_MASK					0x3ff
+
+/* Register RING_BD_READ_PTR_DDR_CONTROL fields */
+#define BD_READ_PTR_DDR_TIMER_VAL_SHIFT			16
+#define BD_READ_PTR_DDR_TIMER_VAL_MASK			0xffff
+#define BD_READ_PTR_DDR_ENABLE_SHIFT			15
+#define BD_READ_PTR_DDR_ENABLE_MASK			0x1
+
+/* ====== FlexRM ring descriptor defines ===== */
+
+/* Completion descriptor format */
+#define CMPL_OPAQUE_SHIFT			0
+#define CMPL_OPAQUE_MASK			0xffff
+#define CMPL_ENGINE_STATUS_SHIFT		16
+#define CMPL_ENGINE_STATUS_MASK			0xffff
+#define CMPL_DME_STATUS_SHIFT			32
+#define CMPL_DME_STATUS_MASK			0xffff
+#define CMPL_RM_STATUS_SHIFT			48
+#define CMPL_RM_STATUS_MASK			0xffff
+
+/* Completion DME status code */
+#define DME_STATUS_MEM_COR_ERR			BIT(0)
+#define DME_STATUS_MEM_UCOR_ERR			BIT(1)
+#define DME_STATUS_FIFO_UNDERFLOW		BIT(2)
+#define DME_STATUS_FIFO_OVERFLOW		BIT(3)
+#define DME_STATUS_RRESP_ERR			BIT(4)
+#define DME_STATUS_BRESP_ERR			BIT(5)
+#define DME_STATUS_ERROR_MASK			(DME_STATUS_MEM_COR_ERR | \
+						 DME_STATUS_MEM_UCOR_ERR | \
+						 DME_STATUS_FIFO_UNDERFLOW | \
+						 DME_STATUS_FIFO_OVERFLOW | \
+						 DME_STATUS_RRESP_ERR | \
+						 DME_STATUS_BRESP_ERR)
+
+/* Completion RM status code */
+#define RM_STATUS_CODE_SHIFT			0
+#define RM_STATUS_CODE_MASK			0x3ff
+#define RM_STATUS_CODE_GOOD			0x0
+#define RM_STATUS_CODE_AE_TIMEOUT		0x3ff
+
+/* General descriptor format */
+#define DESC_TYPE_SHIFT				60
+#define DESC_TYPE_MASK				0xf
+#define DESC_PAYLOAD_SHIFT			0
+#define DESC_PAYLOAD_MASK			0x0fffffffffffffff
+
+/* Null descriptor format  */
+#define NULL_TYPE				0
+#define NULL_TOGGLE_SHIFT			58
+#define NULL_TOGGLE_MASK			0x1
+
+/* Header descriptor format */
+#define HEADER_TYPE				1
+#define HEADER_TOGGLE_SHIFT			58
+#define HEADER_TOGGLE_MASK			0x1
+#define HEADER_ENDPKT_SHIFT			57
+#define HEADER_ENDPKT_MASK			0x1
+#define HEADER_STARTPKT_SHIFT			56
+#define HEADER_STARTPKT_MASK			0x1
+#define HEADER_BDCOUNT_SHIFT			36
+#define HEADER_BDCOUNT_MASK			0x1f
+#define HEADER_BDCOUNT_MAX			HEADER_BDCOUNT_MASK
+#define HEADER_FLAGS_SHIFT			16
+#define HEADER_FLAGS_MASK			0xffff
+#define HEADER_OPAQUE_SHIFT			0
+#define HEADER_OPAQUE_MASK			0xffff
+
+/* Source (SRC) descriptor format */
+#define SRC_TYPE				2
+#define SRC_LENGTH_SHIFT			44
+#define SRC_LENGTH_MASK				0xffff
+#define SRC_ADDR_SHIFT				0
+#define SRC_ADDR_MASK				0x00000fffffffffff
+
+/* Destination (DST) descriptor format */
+#define DST_TYPE				3
+#define DST_LENGTH_SHIFT			44
+#define DST_LENGTH_MASK				0xffff
+#define DST_ADDR_SHIFT				0
+#define DST_ADDR_MASK				0x00000fffffffffff
+
+/* Immediate (IMM) descriptor format */
+#define IMM_TYPE				4
+#define IMM_DATA_SHIFT				0
+#define IMM_DATA_MASK				0x0fffffffffffffff
+
+/* Next pointer (NPTR) descriptor format */
+#define NPTR_TYPE				5
+#define NPTR_TOGGLE_SHIFT			58
+#define NPTR_TOGGLE_MASK			0x1
+#define NPTR_ADDR_SHIFT				0
+#define NPTR_ADDR_MASK				0x00000fffffffffff
+
+/* Mega source (MSRC) descriptor format */
+#define MSRC_TYPE				6
+#define MSRC_LENGTH_SHIFT			44
+#define MSRC_LENGTH_MASK			0xffff
+#define MSRC_ADDR_SHIFT				0
+#define MSRC_ADDR_MASK				0x00000fffffffffff
+
+/* Mega destination (MDST) descriptor format */
+#define MDST_TYPE				7
+#define MDST_LENGTH_SHIFT			44
+#define MDST_LENGTH_MASK			0xffff
+#define MDST_ADDR_SHIFT				0
+#define MDST_ADDR_MASK				0x00000fffffffffff
+
+/* Source with tlast (SRCT) descriptor format */
+#define SRCT_TYPE				8
+#define SRCT_LENGTH_SHIFT			44
+#define SRCT_LENGTH_MASK			0xffff
+#define SRCT_ADDR_SHIFT				0
+#define SRCT_ADDR_MASK				0x00000fffffffffff
+
+/* Destination with tlast (DSTT) descriptor format */
+#define DSTT_TYPE				9
+#define DSTT_LENGTH_SHIFT			44
+#define DSTT_LENGTH_MASK			0xffff
+#define DSTT_ADDR_SHIFT				0
+#define DSTT_ADDR_MASK				0x00000fffffffffff
+
+/* Immediate with tlast (IMMT) descriptor format */
+#define IMMT_TYPE				10
+#define IMMT_DATA_SHIFT				0
+#define IMMT_DATA_MASK				0x0fffffffffffffff
+
+/* Descriptor helper macros */
+#define DESC_DEC(_d, _s, _m)			(((_d) >> (_s)) & (_m))
+#define DESC_ENC(_d, _v, _s, _m)		\
+			do { \
+				(_d) &= ~((u64)(_m) << (_s)); \
+				(_d) |= (((u64)(_v) & (_m)) << (_s)); \
+			} while (0)
+
+/* ====== FlexRM data structures ===== */
+
+struct flexrm_ring {
+	/* Unprotected members */
+	int num;
+	struct flexrm_mbox *mbox;
+	void __iomem *regs;
+	bool irq_requested;
+	unsigned int irq;
+	unsigned int msi_timer_val;
+	unsigned int msi_count_threshold;
+	struct ida requests_ida;
+	struct brcm_message *requests[RING_MAX_REQ_COUNT];
+	void *bd_base;
+	dma_addr_t bd_dma_base;
+	u32 bd_write_offset;
+	void *cmpl_base;
+	dma_addr_t cmpl_dma_base;
+	/* Protected members */
+	spinlock_t lock;
+	struct brcm_message *last_pending_msg;
+	u32 cmpl_read_offset;
+};
+
+struct flexrm_mbox {
+	struct device *dev;
+	void __iomem *regs;
+	u32 num_rings;
+	struct flexrm_ring *rings;
+	struct dma_pool *bd_pool;
+	struct dma_pool *cmpl_pool;
+	struct mbox_controller controller;
+};
+
+/* ====== FlexRM ring descriptor helper routines ===== */
+
+static u64 flexrm_read_desc(void *desc_ptr)
+{
+	return le64_to_cpu(*((u64 *)desc_ptr));
+}
+
+static void flexrm_write_desc(void *desc_ptr, u64 desc)
+{
+	*((u64 *)desc_ptr) = cpu_to_le64(desc);
+}
+
+static u32 flexrm_cmpl_desc_to_reqid(u64 cmpl_desc)
+{
+	return (u32)(cmpl_desc & CMPL_OPAQUE_MASK);
+}
+
+static int flexrm_cmpl_desc_to_error(u64 cmpl_desc)
+{
+	u32 status;
+
+	status = DESC_DEC(cmpl_desc, CMPL_DME_STATUS_SHIFT,
+			  CMPL_DME_STATUS_MASK);
+	if (status & DME_STATUS_ERROR_MASK)
+		return -EIO;
+
+	status = DESC_DEC(cmpl_desc, CMPL_RM_STATUS_SHIFT,
+			  CMPL_RM_STATUS_MASK);
+	status &= RM_STATUS_CODE_MASK;
+	if (status == RM_STATUS_CODE_AE_TIMEOUT)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static bool flexrm_is_next_table_desc(void *desc_ptr)
+{
+	u64 desc = flexrm_read_desc(desc_ptr);
+	u32 type = DESC_DEC(desc, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+
+	return (type == NPTR_TYPE) ? true : false;
+}
+
+static u64 flexrm_next_table_desc(u32 toggle, dma_addr_t next_addr)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, NPTR_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, toggle, NPTR_TOGGLE_SHIFT, NPTR_TOGGLE_MASK);
+	DESC_ENC(desc, next_addr, NPTR_ADDR_SHIFT, NPTR_ADDR_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_null_desc(u32 toggle)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, NULL_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, toggle, NULL_TOGGLE_SHIFT, NULL_TOGGLE_MASK);
+
+	return desc;
+}
+
+static u32 flexrm_estimate_header_desc_count(u32 nhcnt)
+{
+	u32 hcnt = nhcnt / HEADER_BDCOUNT_MAX;
+
+	if (!(nhcnt % HEADER_BDCOUNT_MAX))
+		hcnt += 1;
+
+	return hcnt;
+}
+
+static void flexrm_flip_header_toogle(void *desc_ptr)
+{
+	u64 desc = flexrm_read_desc(desc_ptr);
+
+	if (desc & ((u64)0x1 << HEADER_TOGGLE_SHIFT))
+		desc &= ~((u64)0x1 << HEADER_TOGGLE_SHIFT);
+	else
+		desc |= ((u64)0x1 << HEADER_TOGGLE_SHIFT);
+
+	flexrm_write_desc(desc_ptr, desc);
+}
+
+static u64 flexrm_header_desc(u32 toggle, u32 startpkt, u32 endpkt,
+			       u32 bdcount, u32 flags, u32 opaque)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, HEADER_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, toggle, HEADER_TOGGLE_SHIFT, HEADER_TOGGLE_MASK);
+	DESC_ENC(desc, startpkt, HEADER_STARTPKT_SHIFT, HEADER_STARTPKT_MASK);
+	DESC_ENC(desc, endpkt, HEADER_ENDPKT_SHIFT, HEADER_ENDPKT_MASK);
+	DESC_ENC(desc, bdcount, HEADER_BDCOUNT_SHIFT, HEADER_BDCOUNT_MASK);
+	DESC_ENC(desc, flags, HEADER_FLAGS_SHIFT, HEADER_FLAGS_MASK);
+	DESC_ENC(desc, opaque, HEADER_OPAQUE_SHIFT, HEADER_OPAQUE_MASK);
+
+	return desc;
+}
+
+static void flexrm_enqueue_desc(u32 nhpos, u32 nhcnt, u32 reqid,
+				 u64 desc, void **desc_ptr, u32 *toggle,
+				 void *start_desc, void *end_desc)
+{
+	u64 d;
+	u32 nhavail, _toggle, _startpkt, _endpkt, _bdcount;
+
+	/* Sanity check */
+	if (nhcnt <= nhpos)
+		return;
+
+	/*
+	 * Each request or packet start with a HEADER descriptor followed
+	 * by one or more non-HEADER descriptors (SRC, SRCT, MSRC, DST,
+	 * DSTT, MDST, IMM, and IMMT). The number of non-HEADER descriptors
+	 * following a HEADER descriptor is represented by BDCOUNT field
+	 * of HEADER descriptor. The max value of BDCOUNT field is 31 which
+	 * means we can only have 31 non-HEADER descriptors following one
+	 * HEADER descriptor.
+	 *
+	 * In general use, number of non-HEADER descriptors can easily go
+	 * beyond 31. To tackle this situation, we have packet (or request)
+	 * extenstion bits (STARTPKT and ENDPKT) in the HEADER descriptor.
+	 *
+	 * To use packet extension, the first HEADER descriptor of request
+	 * (or packet) will have STARTPKT=1 and ENDPKT=0. The intermediate
+	 * HEADER descriptors will have STARTPKT=0 and ENDPKT=0. The last
+	 * HEADER descriptor will have STARTPKT=0 and ENDPKT=1. Also, the
+	 * TOGGLE bit of the first HEADER will be set to invalid state to
+	 * ensure that FlexRM does not start fetching descriptors till all
+	 * descriptors are enqueued. The user of this function will flip
+	 * the TOGGLE bit of first HEADER after all descriptors are
+	 * enqueued.
+	 */
+
+	if ((nhpos % HEADER_BDCOUNT_MAX == 0) && (nhcnt - nhpos)) {
+		/* Prepare the header descriptor */
+		nhavail = (nhcnt - nhpos);
+		_toggle = (nhpos == 0) ? !(*toggle) : (*toggle);
+		_startpkt = (nhpos == 0) ? 0x1 : 0x0;
+		_endpkt = (nhavail <= HEADER_BDCOUNT_MAX) ? 0x1 : 0x0;
+		_bdcount = (nhavail <= HEADER_BDCOUNT_MAX) ?
+				nhavail : HEADER_BDCOUNT_MAX;
+		if (nhavail <= HEADER_BDCOUNT_MAX)
+			_bdcount = nhavail;
+		else
+			_bdcount = HEADER_BDCOUNT_MAX;
+		d = flexrm_header_desc(_toggle, _startpkt, _endpkt,
+					_bdcount, 0x0, reqid);
+
+		/* Write header descriptor */
+		flexrm_write_desc(*desc_ptr, d);
+
+		/* Point to next descriptor */
+		*desc_ptr += sizeof(desc);
+		if (*desc_ptr == end_desc)
+			*desc_ptr = start_desc;
+
+		/* Skip next pointer descriptors */
+		while (flexrm_is_next_table_desc(*desc_ptr)) {
+			*toggle = (*toggle) ? 0 : 1;
+			*desc_ptr += sizeof(desc);
+			if (*desc_ptr == end_desc)
+				*desc_ptr = start_desc;
+		}
+	}
+
+	/* Write desired descriptor */
+	flexrm_write_desc(*desc_ptr, desc);
+
+	/* Point to next descriptor */
+	*desc_ptr += sizeof(desc);
+	if (*desc_ptr == end_desc)
+		*desc_ptr = start_desc;
+
+	/* Skip next pointer descriptors */
+	while (flexrm_is_next_table_desc(*desc_ptr)) {
+		*toggle = (*toggle) ? 0 : 1;
+		*desc_ptr += sizeof(desc);
+		if (*desc_ptr == end_desc)
+			*desc_ptr = start_desc;
+	}
+}
+
+static u64 flexrm_src_desc(dma_addr_t addr, unsigned int length)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, SRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, length, SRC_LENGTH_SHIFT, SRC_LENGTH_MASK);
+	DESC_ENC(desc, addr, SRC_ADDR_SHIFT, SRC_ADDR_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_msrc_desc(dma_addr_t addr, unsigned int length_div_16)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, MSRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, length_div_16, MSRC_LENGTH_SHIFT, MSRC_LENGTH_MASK);
+	DESC_ENC(desc, addr, MSRC_ADDR_SHIFT, MSRC_ADDR_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_dst_desc(dma_addr_t addr, unsigned int length)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, DST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, length, DST_LENGTH_SHIFT, DST_LENGTH_MASK);
+	DESC_ENC(desc, addr, DST_ADDR_SHIFT, DST_ADDR_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_mdst_desc(dma_addr_t addr, unsigned int length_div_16)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, MDST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, length_div_16, MDST_LENGTH_SHIFT, MDST_LENGTH_MASK);
+	DESC_ENC(desc, addr, MDST_ADDR_SHIFT, MDST_ADDR_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_imm_desc(u64 data)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, IMM_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, data, IMM_DATA_SHIFT, IMM_DATA_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_srct_desc(dma_addr_t addr, unsigned int length)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, SRCT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, length, SRCT_LENGTH_SHIFT, SRCT_LENGTH_MASK);
+	DESC_ENC(desc, addr, SRCT_ADDR_SHIFT, SRCT_ADDR_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_dstt_desc(dma_addr_t addr, unsigned int length)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, DSTT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, length, DSTT_LENGTH_SHIFT, DSTT_LENGTH_MASK);
+	DESC_ENC(desc, addr, DSTT_ADDR_SHIFT, DSTT_ADDR_MASK);
+
+	return desc;
+}
+
+static u64 flexrm_immt_desc(u64 data)
+{
+	u64 desc = 0;
+
+	DESC_ENC(desc, IMMT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK);
+	DESC_ENC(desc, data, IMMT_DATA_SHIFT, IMMT_DATA_MASK);
+
+	return desc;
+}
+
+static bool flexrm_spu_sanity_check(struct brcm_message *msg)
+{
+	struct scatterlist *sg;
+
+	if (!msg->spu.src || !msg->spu.dst)
+		return false;
+	for (sg = msg->spu.src; sg; sg = sg_next(sg)) {
+		if (sg->length & 0xf) {
+			if (sg->length > SRC_LENGTH_MASK)
+				return false;
+		} else {
+			if (sg->length > (MSRC_LENGTH_MASK * 16))
+				return false;
+		}
+	}
+	for (sg = msg->spu.dst; sg; sg = sg_next(sg)) {
+		if (sg->length & 0xf) {
+			if (sg->length > DST_LENGTH_MASK)
+				return false;
+		} else {
+			if (sg->length > (MDST_LENGTH_MASK * 16))
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static u32 flexrm_spu_estimate_nonheader_desc_count(struct brcm_message *msg)
+{
+	u32 cnt = 0;
+	unsigned int dst_target = 0;
+	struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst;
+
+	while (src_sg || dst_sg) {
+		if (src_sg) {
+			cnt++;
+			dst_target = src_sg->length;
+			src_sg = sg_next(src_sg);
+		} else
+			dst_target = UINT_MAX;
+
+		while (dst_target && dst_sg) {
+			cnt++;
+			if (dst_sg->length < dst_target)
+				dst_target -= dst_sg->length;
+			else
+				dst_target = 0;
+			dst_sg = sg_next(dst_sg);
+		}
+	}
+
+	return cnt;
+}
+
+static int flexrm_spu_dma_map(struct device *dev, struct brcm_message *msg)
+{
+	int rc;
+
+	rc = dma_map_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+			DMA_TO_DEVICE);
+	if (rc < 0)
+		return rc;
+
+	rc = dma_map_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst),
+			DMA_FROM_DEVICE);
+	if (rc < 0) {
+		dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+			     DMA_TO_DEVICE);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void flexrm_spu_dma_unmap(struct device *dev, struct brcm_message *msg)
+{
+	dma_unmap_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst),
+		     DMA_FROM_DEVICE);
+	dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src),
+		     DMA_TO_DEVICE);
+}
+
+static void *flexrm_spu_write_descs(struct brcm_message *msg, u32 nhcnt,
+				     u32 reqid, void *desc_ptr, u32 toggle,
+				     void *start_desc, void *end_desc)
+{
+	u64 d;
+	u32 nhpos = 0;
+	void *orig_desc_ptr = desc_ptr;
+	unsigned int dst_target = 0;
+	struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst;
+
+	while (src_sg || dst_sg) {
+		if (src_sg) {
+			if (sg_dma_len(src_sg) & 0xf)
+				d = flexrm_src_desc(sg_dma_address(src_sg),
+						     sg_dma_len(src_sg));
+			else
+				d = flexrm_msrc_desc(sg_dma_address(src_sg),
+						      sg_dma_len(src_sg)/16);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+			dst_target = sg_dma_len(src_sg);
+			src_sg = sg_next(src_sg);
+		} else
+			dst_target = UINT_MAX;
+
+		while (dst_target && dst_sg) {
+			if (sg_dma_len(dst_sg) & 0xf)
+				d = flexrm_dst_desc(sg_dma_address(dst_sg),
+						     sg_dma_len(dst_sg));
+			else
+				d = flexrm_mdst_desc(sg_dma_address(dst_sg),
+						      sg_dma_len(dst_sg)/16);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+			if (sg_dma_len(dst_sg) < dst_target)
+				dst_target -= sg_dma_len(dst_sg);
+			else
+				dst_target = 0;
+			dst_sg = sg_next(dst_sg);
+		}
+	}
+
+	/* Null descriptor with invalid toggle bit */
+	flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle));
+
+	/* Ensure that descriptors have been written to memory */
+	wmb();
+
+	/* Flip toggle bit in header */
+	flexrm_flip_header_toogle(orig_desc_ptr);
+
+	return desc_ptr;
+}
+
+static bool flexrm_sba_sanity_check(struct brcm_message *msg)
+{
+	u32 i;
+
+	if (!msg->sba.cmds || !msg->sba.cmds_count)
+		return false;
+
+	for (i = 0; i < msg->sba.cmds_count; i++) {
+		if (((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) ||
+		     (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C)) &&
+		    (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT))
+			return false;
+		if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) &&
+		    (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK))
+			return false;
+		if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C) &&
+		    (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK))
+			return false;
+		if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP) &&
+		    (msg->sba.cmds[i].resp_len > DSTT_LENGTH_MASK))
+			return false;
+		if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT) &&
+		    (msg->sba.cmds[i].data_len > DSTT_LENGTH_MASK))
+			return false;
+	}
+
+	return true;
+}
+
+static u32 flexrm_sba_estimate_nonheader_desc_count(struct brcm_message *msg)
+{
+	u32 i, cnt;
+
+	cnt = 0;
+	for (i = 0; i < msg->sba.cmds_count; i++) {
+		cnt++;
+
+		if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) ||
+		    (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C))
+			cnt++;
+
+		if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP)
+			cnt++;
+
+		if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT)
+			cnt++;
+	}
+
+	return cnt;
+}
+
+static void *flexrm_sba_write_descs(struct brcm_message *msg, u32 nhcnt,
+				     u32 reqid, void *desc_ptr, u32 toggle,
+				     void *start_desc, void *end_desc)
+{
+	u64 d;
+	u32 i, nhpos = 0;
+	struct brcm_sba_command *c;
+	void *orig_desc_ptr = desc_ptr;
+
+	/* Convert SBA commands into descriptors */
+	for (i = 0; i < msg->sba.cmds_count; i++) {
+		c = &msg->sba.cmds[i];
+
+		if ((c->flags & BRCM_SBA_CMD_HAS_RESP) &&
+		    (c->flags & BRCM_SBA_CMD_HAS_OUTPUT)) {
+			/* Destination response descriptor */
+			d = flexrm_dst_desc(c->resp, c->resp_len);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+		} else if (c->flags & BRCM_SBA_CMD_HAS_RESP) {
+			/* Destination response with tlast descriptor */
+			d = flexrm_dstt_desc(c->resp, c->resp_len);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+		}
+
+		if (c->flags & BRCM_SBA_CMD_HAS_OUTPUT) {
+			/* Destination with tlast descriptor */
+			d = flexrm_dstt_desc(c->data, c->data_len);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+		}
+
+		if (c->flags & BRCM_SBA_CMD_TYPE_B) {
+			/* Command as immediate descriptor */
+			d = flexrm_imm_desc(c->cmd);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+		} else {
+			/* Command as immediate descriptor with tlast */
+			d = flexrm_immt_desc(c->cmd);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+		}
+
+		if ((c->flags & BRCM_SBA_CMD_TYPE_B) ||
+		    (c->flags & BRCM_SBA_CMD_TYPE_C)) {
+			/* Source with tlast descriptor */
+			d = flexrm_srct_desc(c->data, c->data_len);
+			flexrm_enqueue_desc(nhpos, nhcnt, reqid,
+					     d, &desc_ptr, &toggle,
+					     start_desc, end_desc);
+			nhpos++;
+		}
+	}
+
+	/* Null descriptor with invalid toggle bit */
+	flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle));
+
+	/* Ensure that descriptors have been written to memory */
+	wmb();
+
+	/* Flip toggle bit in header */
+	flexrm_flip_header_toogle(orig_desc_ptr);
+
+	return desc_ptr;
+}
+
+static bool flexrm_sanity_check(struct brcm_message *msg)
+{
+	if (!msg)
+		return false;
+
+	switch (msg->type) {
+	case BRCM_MESSAGE_SPU:
+		return flexrm_spu_sanity_check(msg);
+	case BRCM_MESSAGE_SBA:
+		return flexrm_sba_sanity_check(msg);
+	default:
+		return false;
+	};
+}
+
+static u32 flexrm_estimate_nonheader_desc_count(struct brcm_message *msg)
+{
+	if (!msg)
+		return 0;
+
+	switch (msg->type) {
+	case BRCM_MESSAGE_SPU:
+		return flexrm_spu_estimate_nonheader_desc_count(msg);
+	case BRCM_MESSAGE_SBA:
+		return flexrm_sba_estimate_nonheader_desc_count(msg);
+	default:
+		return 0;
+	};
+}
+
+static int flexrm_dma_map(struct device *dev, struct brcm_message *msg)
+{
+	if (!dev || !msg)
+		return -EINVAL;
+
+	switch (msg->type) {
+	case BRCM_MESSAGE_SPU:
+		return flexrm_spu_dma_map(dev, msg);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void flexrm_dma_unmap(struct device *dev, struct brcm_message *msg)
+{
+	if (!dev || !msg)
+		return;
+
+	switch (msg->type) {
+	case BRCM_MESSAGE_SPU:
+		flexrm_spu_dma_unmap(dev, msg);
+		break;
+	default:
+		break;
+	}
+}
+
+static void *flexrm_write_descs(struct brcm_message *msg, u32 nhcnt,
+				u32 reqid, void *desc_ptr, u32 toggle,
+				void *start_desc, void *end_desc)
+{
+	if (!msg || !desc_ptr || !start_desc || !end_desc)
+		return ERR_PTR(-ENOTSUPP);
+
+	if ((desc_ptr < start_desc) || (end_desc <= desc_ptr))
+		return ERR_PTR(-ERANGE);
+
+	switch (msg->type) {
+	case BRCM_MESSAGE_SPU:
+		return flexrm_spu_write_descs(msg, nhcnt, reqid,
+					       desc_ptr, toggle,
+					       start_desc, end_desc);
+	case BRCM_MESSAGE_SBA:
+		return flexrm_sba_write_descs(msg, nhcnt, reqid,
+					       desc_ptr, toggle,
+					       start_desc, end_desc);
+	default:
+		return ERR_PTR(-ENOTSUPP);
+	};
+}
+
+/* ====== FlexRM driver helper routines ===== */
+
+static int flexrm_new_request(struct flexrm_ring *ring,
+				struct brcm_message *batch_msg,
+				struct brcm_message *msg)
+{
+	void *next;
+	unsigned long flags;
+	u32 val, count, nhcnt;
+	u32 read_offset, write_offset;
+	bool exit_cleanup = false;
+	int ret = 0, reqid;
+
+	/* Do sanity check on message */
+	if (!flexrm_sanity_check(msg))
+		return -EIO;
+	msg->error = 0;
+
+	/* If no requests possible then save data pointer and goto done. */
+	reqid = ida_simple_get(&ring->requests_ida, 0,
+				RING_MAX_REQ_COUNT, GFP_KERNEL);
+	if (reqid < 0) {
+		spin_lock_irqsave(&ring->lock, flags);
+		if (batch_msg)
+			ring->last_pending_msg = batch_msg;
+		else
+			ring->last_pending_msg = msg;
+		spin_unlock_irqrestore(&ring->lock, flags);
+		return 0;
+	}
+	ring->requests[reqid] = msg;
+
+	/* Do DMA mappings for the message */
+	ret = flexrm_dma_map(ring->mbox->dev, msg);
+	if (ret < 0) {
+		ring->requests[reqid] = NULL;
+		ida_simple_remove(&ring->requests_ida, reqid);
+		return ret;
+	}
+
+	/* If last_pending_msg is already set then goto done with error */
+	spin_lock_irqsave(&ring->lock, flags);
+	if (ring->last_pending_msg)
+		ret = -ENOSPC;
+	spin_unlock_irqrestore(&ring->lock, flags);
+	if (ret < 0) {
+		dev_warn(ring->mbox->dev, "no space in ring %d\n", ring->num);
+		exit_cleanup = true;
+		goto exit;
+	}
+
+	/* Determine current HW BD read offset */
+	read_offset = readl_relaxed(ring->regs + RING_BD_READ_PTR);
+	val = readl_relaxed(ring->regs + RING_BD_START_ADDR);
+	read_offset *= RING_DESC_SIZE;
+	read_offset += (u32)(BD_START_ADDR_DECODE(val) - ring->bd_dma_base);
+
+	/*
+	 * Number required descriptors = number of non-header descriptors +
+	 *				 number of header descriptors +
+	 *				 1x null descriptor
+	 */
+	nhcnt = flexrm_estimate_nonheader_desc_count(msg);
+	count = flexrm_estimate_header_desc_count(nhcnt) + nhcnt + 1;
+
+	/* Check for available descriptor space. */
+	write_offset = ring->bd_write_offset;
+	while (count) {
+		if (!flexrm_is_next_table_desc(ring->bd_base + write_offset))
+			count--;
+		write_offset += RING_DESC_SIZE;
+		if (write_offset == RING_BD_SIZE)
+			write_offset = 0x0;
+		if (write_offset == read_offset)
+			break;
+	}
+	if (count) {
+		spin_lock_irqsave(&ring->lock, flags);
+		if (batch_msg)
+			ring->last_pending_msg = batch_msg;
+		else
+			ring->last_pending_msg = msg;
+		spin_unlock_irqrestore(&ring->lock, flags);
+		ret = 0;
+		exit_cleanup = true;
+		goto exit;
+	}
+
+	/* Write descriptors to ring */
+	next = flexrm_write_descs(msg, nhcnt, reqid,
+			ring->bd_base + ring->bd_write_offset,
+			RING_BD_TOGGLE_VALID(ring->bd_write_offset),
+			ring->bd_base, ring->bd_base + RING_BD_SIZE);
+	if (IS_ERR(next)) {
+		ret = PTR_ERR(next);
+		exit_cleanup = true;
+		goto exit;
+	}
+
+	/* Save ring BD write offset */
+	ring->bd_write_offset = (unsigned long)(next - ring->bd_base);
+
+exit:
+	/* Update error status in message */
+	msg->error = ret;
+
+	/* Cleanup if we failed */
+	if (exit_cleanup) {
+		flexrm_dma_unmap(ring->mbox->dev, msg);
+		ring->requests[reqid] = NULL;
+		ida_simple_remove(&ring->requests_ida, reqid);
+	}
+
+	return ret;
+}
+
+static int flexrm_process_completions(struct flexrm_ring *ring)
+{
+	u64 desc;
+	int err, count = 0;
+	unsigned long flags;
+	struct brcm_message *msg = NULL;
+	u32 reqid, cmpl_read_offset, cmpl_write_offset;
+	struct mbox_chan *chan = &ring->mbox->controller.chans[ring->num];
+
+	spin_lock_irqsave(&ring->lock, flags);
+
+	/* Check last_pending_msg */
+	if (ring->last_pending_msg) {
+		msg = ring->last_pending_msg;
+		ring->last_pending_msg = NULL;
+	}
+
+	/*
+	 * Get current completion read and write offset
+	 *
+	 * Note: We should read completion write pointer atleast once
+	 * after we get a MSI interrupt because HW maintains internal
+	 * MSI status which will allow next MSI interrupt only after
+	 * completion write pointer is read.
+	 */
+	cmpl_write_offset = readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR);
+	cmpl_write_offset *= RING_DESC_SIZE;
+	cmpl_read_offset = ring->cmpl_read_offset;
+	ring->cmpl_read_offset = cmpl_write_offset;
+
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	/* If last_pending_msg was set then queue it back */
+	if (msg)
+		mbox_send_message(chan, msg);
+
+	/* For each completed request notify mailbox clients */
+	reqid = 0;
+	while (cmpl_read_offset != cmpl_write_offset) {
+		/* Dequeue next completion descriptor */
+		desc = *((u64 *)(ring->cmpl_base + cmpl_read_offset));
+
+		/* Next read offset */
+		cmpl_read_offset += RING_DESC_SIZE;
+		if (cmpl_read_offset == RING_CMPL_SIZE)
+			cmpl_read_offset = 0;
+
+		/* Decode error from completion descriptor */
+		err = flexrm_cmpl_desc_to_error(desc);
+		if (err < 0) {
+			dev_warn(ring->mbox->dev,
+				 "got completion desc=0x%lx with error %d",
+				 (unsigned long)desc, err);
+		}
+
+		/* Determine request id from completion descriptor */
+		reqid = flexrm_cmpl_desc_to_reqid(desc);
+
+		/* Determine message pointer based on reqid */
+		msg = ring->requests[reqid];
+		if (!msg) {
+			dev_warn(ring->mbox->dev,
+				 "null msg pointer for completion desc=0x%lx",
+				 (unsigned long)desc);
+			continue;
+		}
+
+		/* Release reqid for recycling */
+		ring->requests[reqid] = NULL;
+		ida_simple_remove(&ring->requests_ida, reqid);
+
+		/* Unmap DMA mappings */
+		flexrm_dma_unmap(ring->mbox->dev, msg);
+
+		/* Give-back message to mailbox client */
+		msg->error = err;
+		mbox_chan_received_data(chan, msg);
+
+		/* Increment number of completions processed */
+		count++;
+	}
+
+	return count;
+}
+
+/* ====== FlexRM interrupt handler ===== */
+
+static irqreturn_t flexrm_irq_event(int irq, void *dev_id)
+{
+	/* We only have MSI for completions so just wakeup IRQ thread */
+	/* Ring related errors will be informed via completion descriptors */
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t flexrm_irq_thread(int irq, void *dev_id)
+{
+	flexrm_process_completions(dev_id);
+
+	return IRQ_HANDLED;
+}
+
+/* ====== FlexRM mailbox callbacks ===== */
+
+static int flexrm_send_data(struct mbox_chan *chan, void *data)
+{
+	int i, rc;
+	struct flexrm_ring *ring = chan->con_priv;
+	struct brcm_message *msg = data;
+
+	if (msg->type == BRCM_MESSAGE_BATCH) {
+		for (i = msg->batch.msgs_queued;
+		     i < msg->batch.msgs_count; i++) {
+			rc = flexrm_new_request(ring, msg,
+						 &msg->batch.msgs[i]);
+			if (rc) {
+				msg->error = rc;
+				return rc;
+			}
+			msg->batch.msgs_queued++;
+		}
+		return 0;
+	}
+
+	return flexrm_new_request(ring, NULL, data);
+}
+
+static bool flexrm_peek_data(struct mbox_chan *chan)
+{
+	int cnt = flexrm_process_completions(chan->con_priv);
+
+	return (cnt > 0) ? true : false;
+}
+
+static int flexrm_startup(struct mbox_chan *chan)
+{
+	u64 d;
+	u32 val, off;
+	int ret = 0;
+	dma_addr_t next_addr;
+	struct flexrm_ring *ring = chan->con_priv;
+
+	/* Allocate BD memory */
+	ring->bd_base = dma_pool_alloc(ring->mbox->bd_pool,
+				       GFP_KERNEL, &ring->bd_dma_base);
+	if (!ring->bd_base) {
+		dev_err(ring->mbox->dev, "can't allocate BD memory\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* Configure next table pointer entries in BD memory */
+	for (off = 0; off < RING_BD_SIZE; off += RING_DESC_SIZE) {
+		next_addr = off + RING_DESC_SIZE;
+		if (next_addr == RING_BD_SIZE)
+			next_addr = 0;
+		next_addr += ring->bd_dma_base;
+		if (RING_BD_ALIGN_CHECK(next_addr))
+			d = flexrm_next_table_desc(RING_BD_TOGGLE_VALID(off),
+						    next_addr);
+		else
+			d = flexrm_null_desc(RING_BD_TOGGLE_INVALID(off));
+		flexrm_write_desc(ring->bd_base + off, d);
+	}
+
+	/* Allocate completion memory */
+	ring->cmpl_base = dma_pool_alloc(ring->mbox->cmpl_pool,
+					 GFP_KERNEL, &ring->cmpl_dma_base);
+	if (!ring->cmpl_base) {
+		dev_err(ring->mbox->dev, "can't allocate completion memory\n");
+		ret = -ENOMEM;
+		goto fail_free_bd_memory;
+	}
+	memset(ring->cmpl_base, 0, RING_CMPL_SIZE);
+
+	/* Request IRQ */
+	if (ring->irq == UINT_MAX) {
+		dev_err(ring->mbox->dev, "ring IRQ not available\n");
+		ret = -ENODEV;
+		goto fail_free_cmpl_memory;
+	}
+	ret = request_threaded_irq(ring->irq,
+				   flexrm_irq_event,
+				   flexrm_irq_thread,
+				   0, dev_name(ring->mbox->dev), ring);
+	if (ret) {
+		dev_err(ring->mbox->dev, "failed to request ring IRQ\n");
+		goto fail_free_cmpl_memory;
+	}
+	ring->irq_requested = true;
+
+	/* Disable/inactivate ring */
+	writel_relaxed(0x0, ring->regs + RING_CONTROL);
+
+	/* Program BD start address */
+	val = BD_START_ADDR_VALUE(ring->bd_dma_base);
+	writel_relaxed(val, ring->regs + RING_BD_START_ADDR);
+
+	/* BD write pointer will be same as HW write pointer */
+	ring->bd_write_offset =
+			readl_relaxed(ring->regs + RING_BD_WRITE_PTR);
+	ring->bd_write_offset *= RING_DESC_SIZE;
+
+	/* Program completion start address */
+	val = CMPL_START_ADDR_VALUE(ring->cmpl_dma_base);
+	writel_relaxed(val, ring->regs + RING_CMPL_START_ADDR);
+
+	/* Ensure last pending message is cleared */
+	ring->last_pending_msg = NULL;
+
+	/* Completion read pointer will be same as HW write pointer */
+	ring->cmpl_read_offset =
+			readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR);
+	ring->cmpl_read_offset *= RING_DESC_SIZE;
+
+	/* Read ring Tx, Rx, and Outstanding counts to clear */
+	readl_relaxed(ring->regs + RING_NUM_REQ_RECV_LS);
+	readl_relaxed(ring->regs + RING_NUM_REQ_RECV_MS);
+	readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_LS);
+	readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_MS);
+	readl_relaxed(ring->regs + RING_NUM_REQ_OUTSTAND);
+
+	/* Configure RING_MSI_CONTROL */
+	val = 0;
+	val |= (ring->msi_timer_val << MSI_TIMER_VAL_SHIFT);
+	val |= BIT(MSI_ENABLE_SHIFT);
+	val |= (ring->msi_count_threshold & MSI_COUNT_MASK) << MSI_COUNT_SHIFT;
+	writel_relaxed(val, ring->regs + RING_MSI_CONTROL);
+
+	/* Enable/activate ring */
+	val = BIT(CONTROL_ACTIVE_SHIFT);
+	writel_relaxed(val, ring->regs + RING_CONTROL);
+
+	return 0;
+
+fail_free_cmpl_memory:
+	dma_pool_free(ring->mbox->cmpl_pool,
+		      ring->cmpl_base, ring->cmpl_dma_base);
+	ring->cmpl_base = NULL;
+fail_free_bd_memory:
+	dma_pool_free(ring->mbox->bd_pool,
+		      ring->bd_base, ring->bd_dma_base);
+	ring->bd_base = NULL;
+fail:
+	return ret;
+}
+
+static void flexrm_shutdown(struct mbox_chan *chan)
+{
+	u32 reqid;
+	unsigned int timeout;
+	struct brcm_message *msg;
+	struct flexrm_ring *ring = chan->con_priv;
+
+	/* Disable/inactivate ring */
+	writel_relaxed(0x0, ring->regs + RING_CONTROL);
+
+	/* Flush ring with timeout of 1s */
+	timeout = 1000;
+	writel_relaxed(BIT(CONTROL_FLUSH_SHIFT),
+			ring->regs + RING_CONTROL);
+	do {
+		if (readl_relaxed(ring->regs + RING_FLUSH_DONE) &
+		    FLUSH_DONE_MASK)
+			break;
+		mdelay(1);
+	} while (timeout--);
+
+	/* Abort all in-flight requests */
+	for (reqid = 0; reqid < RING_MAX_REQ_COUNT; reqid++) {
+		msg = ring->requests[reqid];
+		if (!msg)
+			continue;
+
+		/* Release reqid for recycling */
+		ring->requests[reqid] = NULL;
+		ida_simple_remove(&ring->requests_ida, reqid);
+
+		/* Unmap DMA mappings */
+		flexrm_dma_unmap(ring->mbox->dev, msg);
+
+		/* Give-back message to mailbox client */
+		msg->error = -EIO;
+		mbox_chan_received_data(chan, msg);
+	}
+
+	/* Release IRQ */
+	if (ring->irq_requested) {
+		free_irq(ring->irq, ring);
+		ring->irq_requested = false;
+	}
+
+	/* Free-up completion descriptor ring */
+	if (ring->cmpl_base) {
+		dma_pool_free(ring->mbox->cmpl_pool,
+			      ring->cmpl_base, ring->cmpl_dma_base);
+		ring->cmpl_base = NULL;
+	}
+
+	/* Free-up BD descriptor ring */
+	if (ring->bd_base) {
+		dma_pool_free(ring->mbox->bd_pool,
+			      ring->bd_base, ring->bd_dma_base);
+		ring->bd_base = NULL;
+	}
+}
+
+static bool flexrm_last_tx_done(struct mbox_chan *chan)
+{
+	bool ret;
+	unsigned long flags;
+	struct flexrm_ring *ring = chan->con_priv;
+
+	spin_lock_irqsave(&ring->lock, flags);
+	ret = (ring->last_pending_msg) ? false : true;
+	spin_unlock_irqrestore(&ring->lock, flags);
+
+	return ret;
+}
+
+static const struct mbox_chan_ops flexrm_mbox_chan_ops = {
+	.send_data	= flexrm_send_data,
+	.startup	= flexrm_startup,
+	.shutdown	= flexrm_shutdown,
+	.last_tx_done	= flexrm_last_tx_done,
+	.peek_data	= flexrm_peek_data,
+};
+
+static struct mbox_chan *flexrm_mbox_of_xlate(struct mbox_controller *cntlr,
+					const struct of_phandle_args *pa)
+{
+	struct mbox_chan *chan;
+	struct flexrm_ring *ring;
+
+	if (pa->args_count < 3)
+		return ERR_PTR(-EINVAL);
+
+	if (pa->args[0] >= cntlr->num_chans)
+		return ERR_PTR(-ENOENT);
+
+	if (pa->args[1] > MSI_COUNT_MASK)
+		return ERR_PTR(-EINVAL);
+
+	if (pa->args[2] > MSI_TIMER_VAL_MASK)
+		return ERR_PTR(-EINVAL);
+
+	chan = &cntlr->chans[pa->args[0]];
+	ring = chan->con_priv;
+	ring->msi_count_threshold = pa->args[1];
+	ring->msi_timer_val = pa->args[2];
+
+	return chan;
+}
+
+/* ====== FlexRM platform driver ===== */
+
+static void flexrm_mbox_msi_write(struct msi_desc *desc, struct msi_msg *msg)
+{
+	struct device *dev = msi_desc_to_dev(desc);
+	struct flexrm_mbox *mbox = dev_get_drvdata(dev);
+	struct flexrm_ring *ring = &mbox->rings[desc->platform.msi_index];
+
+	/* Configure per-Ring MSI registers */
+	writel_relaxed(msg->address_lo, ring->regs + RING_MSI_ADDR_LS);
+	writel_relaxed(msg->address_hi, ring->regs + RING_MSI_ADDR_MS);
+	writel_relaxed(msg->data, ring->regs + RING_MSI_DATA_VALUE);
+}
+
+static int flexrm_mbox_probe(struct platform_device *pdev)
+{
+	int index, ret = 0;
+	void __iomem *regs;
+	void __iomem *regs_end;
+	struct msi_desc *desc;
+	struct resource *iomem;
+	struct flexrm_ring *ring;
+	struct flexrm_mbox *mbox;
+	struct device *dev = &pdev->dev;
+
+	/* Allocate driver mailbox struct */
+	mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL);
+	if (!mbox) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+	mbox->dev = dev;
+	platform_set_drvdata(pdev, mbox);
+
+	/* Get resource for registers */
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iomem || (resource_size(iomem) < RING_REGS_SIZE)) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	/* Map registers of all rings */
+	mbox->regs = devm_ioremap_resource(&pdev->dev, iomem);
+	if (IS_ERR(mbox->regs)) {
+		ret = PTR_ERR(mbox->regs);
+		dev_err(&pdev->dev, "Failed to remap mailbox regs: %d\n", ret);
+		goto fail;
+	}
+	regs_end = mbox->regs + resource_size(iomem);
+
+	/* Scan and count available rings */
+	mbox->num_rings = 0;
+	for (regs = mbox->regs; regs < regs_end; regs += RING_REGS_SIZE) {
+		if (readl_relaxed(regs + RING_VER) == RING_VER_MAGIC)
+			mbox->num_rings++;
+	}
+	if (!mbox->num_rings) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	/* Allocate driver ring structs */
+	ring = devm_kcalloc(dev, mbox->num_rings, sizeof(*ring), GFP_KERNEL);
+	if (!ring) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+	mbox->rings = ring;
+
+	/* Initialize members of driver ring structs */
+	regs = mbox->regs;
+	for (index = 0; index < mbox->num_rings; index++) {
+		ring = &mbox->rings[index];
+		ring->num = index;
+		ring->mbox = mbox;
+		while ((regs < regs_end) &&
+		       (readl_relaxed(regs + RING_VER) != RING_VER_MAGIC))
+			regs += RING_REGS_SIZE;
+		if (regs_end <= regs) {
+			ret = -ENODEV;
+			goto fail;
+		}
+		ring->regs = regs;
+		regs += RING_REGS_SIZE;
+		ring->irq = UINT_MAX;
+		ring->irq_requested = false;
+		ring->msi_timer_val = MSI_TIMER_VAL_MASK;
+		ring->msi_count_threshold = 0x1;
+		ida_init(&ring->requests_ida);
+		memset(ring->requests, 0, sizeof(ring->requests));
+		ring->bd_base = NULL;
+		ring->bd_dma_base = 0;
+		ring->cmpl_base = NULL;
+		ring->cmpl_dma_base = 0;
+		spin_lock_init(&ring->lock);
+		ring->last_pending_msg = NULL;
+		ring->cmpl_read_offset = 0;
+	}
+
+	/* FlexRM is capable of 40-bit physical addresses only */
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret)
+			goto fail;
+	}
+
+	/* Create DMA pool for ring BD memory */
+	mbox->bd_pool = dma_pool_create("bd", dev, RING_BD_SIZE,
+					1 << RING_BD_ALIGN_ORDER, 0);
+	if (!mbox->bd_pool) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* Create DMA pool for ring completion memory */
+	mbox->cmpl_pool = dma_pool_create("cmpl", dev, RING_CMPL_SIZE,
+					  1 << RING_CMPL_ALIGN_ORDER, 0);
+	if (!mbox->cmpl_pool) {
+		ret = -ENOMEM;
+		goto fail_destroy_bd_pool;
+	}
+
+	/* Allocate platform MSIs for each ring */
+	ret = platform_msi_domain_alloc_irqs(dev, mbox->num_rings,
+						flexrm_mbox_msi_write);
+	if (ret)
+		goto fail_destroy_cmpl_pool;
+
+	/* Save alloced IRQ numbers for each ring */
+	for_each_msi_entry(desc, dev) {
+		ring = &mbox->rings[desc->platform.msi_index];
+		ring->irq = desc->irq;
+	}
+
+	/* Initialize mailbox controller */
+	mbox->controller.txdone_irq = false;
+	mbox->controller.txdone_poll = true;
+	mbox->controller.txpoll_period = 1;
+	mbox->controller.ops = &flexrm_mbox_chan_ops;
+	mbox->controller.dev = dev;
+	mbox->controller.num_chans = mbox->num_rings;
+	mbox->controller.of_xlate = flexrm_mbox_of_xlate;
+	mbox->controller.chans = devm_kcalloc(dev, mbox->num_rings,
+				sizeof(*mbox->controller.chans), GFP_KERNEL);
+	if (!mbox->controller.chans) {
+		ret = -ENOMEM;
+		goto fail_free_msis;
+	}
+	for (index = 0; index < mbox->num_rings; index++)
+		mbox->controller.chans[index].con_priv = &mbox->rings[index];
+
+	/* Register mailbox controller */
+	ret = mbox_controller_register(&mbox->controller);
+	if (ret)
+		goto fail_free_msis;
+
+	dev_info(dev, "registered flexrm mailbox with %d channels\n",
+			mbox->controller.num_chans);
+
+	return 0;
+
+fail_free_msis:
+	platform_msi_domain_free_irqs(dev);
+fail_destroy_cmpl_pool:
+	dma_pool_destroy(mbox->cmpl_pool);
+fail_destroy_bd_pool:
+	dma_pool_destroy(mbox->bd_pool);
+fail:
+	return ret;
+}
+
+static int flexrm_mbox_remove(struct platform_device *pdev)
+{
+	int index;
+	struct device *dev = &pdev->dev;
+	struct flexrm_ring *ring;
+	struct flexrm_mbox *mbox = platform_get_drvdata(pdev);
+
+	mbox_controller_unregister(&mbox->controller);
+
+	platform_msi_domain_free_irqs(dev);
+
+	dma_pool_destroy(mbox->cmpl_pool);
+	dma_pool_destroy(mbox->bd_pool);
+
+	for (index = 0; index < mbox->num_rings; index++) {
+		ring = &mbox->rings[index];
+		ida_destroy(&ring->requests_ida);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id flexrm_mbox_of_match[] = {
+	{ .compatible = "brcm,iproc-flexrm-mbox", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, flexrm_mbox_of_match);
+
+static struct platform_driver flexrm_mbox_driver = {
+	.driver = {
+		.name = "brcm-flexrm-mbox",
+		.of_match_table = flexrm_mbox_of_match,
+	},
+	.probe		= flexrm_mbox_probe,
+	.remove		= flexrm_mbox_remove,
+};
+module_platform_driver(flexrm_mbox_driver);
+
+MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom FlexRM mailbox driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c
index 2aeb034d5fb9..4fe7be0bdd11 100644
--- a/drivers/mailbox/bcm-pdc-mailbox.c
+++ b/drivers/mailbox/bcm-pdc-mailbox.c
@@ -18,7 +18,8 @@
  * Broadcom PDC Mailbox Driver
  * The PDC provides a ring based programming interface to one or more hardware
  * offload engines. For example, the PDC driver works with both SPU-M and SPU2
- * cryptographic offload hardware. In some chips the PDC is referred to as MDE.
+ * cryptographic offload hardware. In some chips the PDC is referred to as MDE,
+ * and in others the FA2/FA+ hardware is used with this PDC driver.
  *
  * The PDC driver registers with the Linux mailbox framework as a mailbox
  * controller, once for each PDC instance. Ring 0 for each PDC is registered as
@@ -108,6 +109,7 @@
 #define PDC_INTMASK_OFFSET   0x24
 #define PDC_INTSTATUS_OFFSET 0x20
 #define PDC_RCVLAZY0_OFFSET  (0x30 + 4 * PDC_RINGSET)
+#define FA_RCVLAZY0_OFFSET   0x100
 
 /*
  * For SPU2, configure MDE_CKSUM_CONTROL to write 17 bytes of metadata
@@ -162,6 +164,11 @@
 /* Maximum size buffer the DMA engine can handle */
 #define PDC_DMA_BUF_MAX 16384
 
+enum pdc_hw {
+	FA_HW,		/* FA2/FA+ hardware (i.e. Northstar Plus) */
+	PDC_HW		/* PDC/MDE hardware (i.e. Northstar 2, Pegasus) */
+};
+
 struct pdc_dma_map {
 	void *ctx;          /* opaque context associated with frame */
 };
@@ -211,13 +218,13 @@ struct pdc_regs {
 	u32  gptimer;                /* 0x028 */
 
 	u32  PAD;
-	u32  intrcvlazy_0;           /* 0x030 */
-	u32  intrcvlazy_1;           /* 0x034 */
-	u32  intrcvlazy_2;           /* 0x038 */
-	u32  intrcvlazy_3;           /* 0x03c */
+	u32  intrcvlazy_0;           /* 0x030 (Only in PDC, not FA2) */
+	u32  intrcvlazy_1;           /* 0x034 (Only in PDC, not FA2) */
+	u32  intrcvlazy_2;           /* 0x038 (Only in PDC, not FA2) */
+	u32  intrcvlazy_3;           /* 0x03c (Only in PDC, not FA2) */
 
 	u32  PAD[48];
-	u32  removed_intrecvlazy;    /* 0x100 */
+	u32  fa_intrecvlazy;         /* 0x100 (Only in FA2, not PDC) */
 	u32  flowctlthresh;          /* 0x104 */
 	u32  wrrthresh;              /* 0x108 */
 	u32  gmac_idle_cnt_thresh;   /* 0x10c */
@@ -243,7 +250,7 @@ struct pdc_regs {
 	u32  serdes_status1;         /* 0x1b0 */
 	u32  PAD[11];                /* 0x1b4-1dc */
 	u32  clk_ctl_st;             /* 0x1e0 */
-	u32  hw_war;                 /* 0x1e4 */
+	u32  hw_war;                 /* 0x1e4 (Only in PDC, not FA2) */
 	u32  pwrctl;                 /* 0x1e8 */
 	u32  PAD[5];
 
@@ -410,6 +417,9 @@ struct pdc_state {
 	u32  txnobuf;          /* unable to create tx descriptor */
 	u32  rxnobuf;          /* unable to create rx descriptor */
 	u32  rx_oflow;         /* count of rx overflows */
+
+	/* hardware type - FA2 or PDC/MDE */
+	enum pdc_hw hw_type;
 };
 
 /* Global variables */
@@ -1396,7 +1406,13 @@ static int pdc_interrupts_init(struct pdc_state *pdcs)
 
 	/* interrupt configuration */
 	iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
-	iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + PDC_RCVLAZY0_OFFSET);
+
+	if (pdcs->hw_type == FA_HW)
+		iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
+			  FA_RCVLAZY0_OFFSET);
+	else
+		iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
+			  PDC_RCVLAZY0_OFFSET);
 
 	/* read irq from device tree */
 	pdcs->pdc_irq = irq_of_parse_and_map(dn, 0);
@@ -1465,6 +1481,17 @@ static int pdc_mb_init(struct pdc_state *pdcs)
 	return 0;
 }
 
+/* Device tree API */
+static const int pdc_hw = PDC_HW;
+static const int fa_hw = FA_HW;
+
+static const struct of_device_id pdc_mbox_of_match[] = {
+	{.compatible = "brcm,iproc-pdc-mbox", .data = &pdc_hw},
+	{.compatible = "brcm,iproc-fa2-mbox", .data = &fa_hw},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, pdc_mbox_of_match);
+
 /**
  * pdc_dt_read() - Read application-specific data from device tree.
  * @pdev:  Platform device
@@ -1481,6 +1508,8 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *dn = pdev->dev.of_node;
+	const struct of_device_id *match;
+	const int *hw_type;
 	int err;
 
 	err = of_property_read_u32(dn, "brcm,rx-status-len",
@@ -1492,6 +1521,14 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs)
 
 	pdcs->use_bcm_hdr = of_property_read_bool(dn, "brcm,use-bcm-hdr");
 
+	pdcs->hw_type = PDC_HW;
+
+	match = of_match_device(of_match_ptr(pdc_mbox_of_match), dev);
+	if (match != NULL) {
+		hw_type = match->data;
+		pdcs->hw_type = *hw_type;
+	}
+
 	return 0;
 }
 
@@ -1525,7 +1562,7 @@ static int pdc_probe(struct platform_device *pdev)
 	pdcs->pdc_idx = pdcg.num_spu;
 	pdcg.num_spu++;
 
-	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39));
 	if (err) {
 		dev_warn(dev, "PDC device cannot perform DMA. Error %d.", err);
 		goto cleanup;
@@ -1611,12 +1648,6 @@ static int pdc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id pdc_mbox_of_match[] = {
-	{.compatible = "brcm,iproc-pdc-mbox"},
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, pdc_mbox_of_match);
-
 static struct platform_driver pdc_mbox_driver = {
 	.probe = pdc_probe,
 	.remove = pdc_remove,
diff --git a/drivers/mailbox/hi6220-mailbox.c b/drivers/mailbox/hi6220-mailbox.c
index 613722db5daf..519376d3534c 100644
--- a/drivers/mailbox/hi6220-mailbox.c
+++ b/drivers/mailbox/hi6220-mailbox.c
@@ -221,7 +221,7 @@ static void hi6220_mbox_shutdown(struct mbox_chan *chan)
 	mbox->irq_map_chan[mchan->ack_irq] = NULL;
 }
 
-static struct mbox_chan_ops hi6220_mbox_ops = {
+static const struct mbox_chan_ops hi6220_mbox_ops = {
 	.send_data    = hi6220_mbox_send_data,
 	.startup      = hi6220_mbox_startup,
 	.shutdown     = hi6220_mbox_shutdown,
diff --git a/drivers/mailbox/mailbox-xgene-slimpro.c b/drivers/mailbox/mailbox-xgene-slimpro.c
index dd2afbca51c9..a7040163dd43 100644
--- a/drivers/mailbox/mailbox-xgene-slimpro.c
+++ b/drivers/mailbox/mailbox-xgene-slimpro.c
@@ -174,7 +174,7 @@ static void slimpro_mbox_shutdown(struct mbox_chan *chan)
 	devm_free_irq(mb_chan->dev, mb_chan->irq, mb_chan);
 }
 
-static struct mbox_chan_ops slimpro_mbox_ops = {
+static const struct mbox_chan_ops slimpro_mbox_ops = {
 	.send_data = slimpro_mbox_send_data,
 	.startup = slimpro_mbox_startup,
 	.shutdown = slimpro_mbox_shutdown,
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 4671f8a12872..9dfbf7ea10a2 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -103,11 +103,14 @@ static void tx_tick(struct mbox_chan *chan, int r)
 	/* Submit next message */
 	msg_submit(chan);
 
+	if (!mssg)
+		return;
+
 	/* Notify the client */
-	if (mssg && chan->cl->tx_done)
+	if (chan->cl->tx_done)
 		chan->cl->tx_done(chan->cl, mssg, r);
 
-	if (chan->cl->tx_block)
+	if (r != -ETIME && chan->cl->tx_block)
 		complete(&chan->tx_complete);
 }
 
@@ -260,7 +263,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)
 
 	msg_submit(chan);
 
-	if (chan->cl->tx_block && chan->active_req) {
+	if (chan->cl->tx_block) {
 		unsigned long wait;
 		int ret;
 
@@ -271,8 +274,8 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)
 
 		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
 		if (ret == 0) {
-			t = -EIO;
-			tx_tick(chan, -EIO);
+			t = -ETIME;
+			tx_tick(chan, t);
 		}
 	}
 
@@ -453,6 +456,12 @@ int mbox_controller_register(struct mbox_controller *mbox)
 		txdone = TXDONE_BY_ACK;
 
 	if (txdone == TXDONE_BY_POLL) {
+
+		if (!mbox->ops->last_tx_done) {
+			dev_err(mbox->dev, "last_tx_done method is absent\n");
+			return -EINVAL;
+		}
+
 		hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC,
 			     HRTIMER_MODE_REL);
 		mbox->poll_hrt.function = txdone_hrtimer;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 9c689b34e6e7..975922c8f231 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2773,7 +2773,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	ti->num_discard_bios = 1;
 	ti->discards_supported = true;
-	ti->discard_zeroes_data_unsupported = true;
 	ti->split_discard_bios = false;
 
 	cache->features = ca->features;
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 136fda3ff9e5..fea5bd52ada8 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -132,6 +132,7 @@ void dm_init_md_queue(struct mapped_device *md);
 void dm_init_normal_md_queue(struct mapped_device *md);
 int md_in_flight(struct mapped_device *md);
 void disable_write_same(struct mapped_device *md);
+void disable_write_zeroes(struct mapped_device *md);
 
 static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
 {
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 389a3637ffcc..ef1d836bd81b 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2030,7 +2030,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	wake_up_process(cc->write_thread);
 
 	ti->num_flush_bios = 1;
-	ti->discard_zeroes_data_unsupported = true;
 
 	return 0;
 
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 03940bf36f6c..3702e502466d 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -312,9 +312,12 @@ static void do_region(int op, int op_flags, unsigned region,
 	 */
 	if (op == REQ_OP_DISCARD)
 		special_cmd_max_sectors = q->limits.max_discard_sectors;
+	else if (op == REQ_OP_WRITE_ZEROES)
+		special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
 	else if (op == REQ_OP_WRITE_SAME)
 		special_cmd_max_sectors = q->limits.max_write_same_sectors;
-	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_SAME) &&
+	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
+	     op == REQ_OP_WRITE_SAME)  &&
 	    special_cmd_max_sectors == 0) {
 		dec_count(io, region, -EOPNOTSUPP);
 		return;
@@ -328,11 +331,18 @@ static void do_region(int op, int op_flags, unsigned region,
 		/*
 		 * Allocate a suitably sized-bio.
 		 */
-		if ((op == REQ_OP_DISCARD) || (op == REQ_OP_WRITE_SAME))
+		switch (op) {
+		case REQ_OP_DISCARD:
+		case REQ_OP_WRITE_ZEROES:
+			num_bvecs = 0;
+			break;
+		case REQ_OP_WRITE_SAME:
 			num_bvecs = 1;
-		else
+			break;
+		default:
 			num_bvecs = min_t(int, BIO_MAX_PAGES,
 					  dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+		}
 
 		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
 		bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
@@ -341,7 +351,7 @@ static void do_region(int op, int op_flags, unsigned region,
 		bio_set_op_attrs(bio, op, op_flags);
 		store_io_and_region_in_bio(bio, io, region);
 
-		if (op == REQ_OP_DISCARD) {
+		if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
 			num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
 			bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
 			remaining -= num_sectors;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 9e9d04cb7d51..f85846741d50 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -733,11 +733,11 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 		job->pages = &zero_page_list;
 
 		/*
-		 * Use WRITE SAME to optimize zeroing if all dests support it.
+		 * Use WRITE ZEROES to optimize zeroing if all dests support it.
 		 */
-		job->rw = REQ_OP_WRITE_SAME;
+		job->rw = REQ_OP_WRITE_ZEROES;
 		for (i = 0; i < job->num_dests; i++)
-			if (!bdev_write_same(job->dests[i].bdev)) {
+			if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) {
 				job->rw = WRITE;
 				break;
 			}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 4788b0b989a9..e17fd44ceef5 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->num_flush_bios = 1;
 	ti->num_discard_bios = 1;
 	ti->num_write_same_bios = 1;
+	ti->num_write_zeroes_bios = 1;
 	ti->private = lc;
 	return 0;
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 7f223dbed49f..2950b145443d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1103,6 +1103,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	ti->num_flush_bios = 1;
 	ti->num_discard_bios = 1;
 	ti->num_write_same_bios = 1;
+	ti->num_write_zeroes_bios = 1;
 	if (m->queue_mode == DM_TYPE_BIO_BASED)
 		ti->per_io_data_size = multipath_per_bio_data_size();
 	else
@@ -1491,7 +1492,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
 	 */
 	int r = DM_ENDIO_REQUEUE;
 
-	if (!error && !clone->errors)
+	if (!error)
 		return 0;	/* I/O complete */
 
 	if (noretry_error(error))
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1e217ba84d09..2dae3e5b851c 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2813,7 +2813,9 @@ static void configure_discard_support(struct raid_set *rs)
 	/* Assume discards not supported until after checks below. */
 	ti->discards_supported = false;
 
-	/* RAID level 4,5,6 require discard_zeroes_data for data integrity! */
+	/*
+	 * XXX: RAID level 4,5,6 require zeroing for safety.
+	 */
 	raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6);
 
 	for (i = 0; i < rs->raid_disks; i++) {
@@ -2827,8 +2829,6 @@ static void configure_discard_support(struct raid_set *rs)
 			return;
 
 		if (raid456) {
-			if (!q->limits.discard_zeroes_data)
-				return;
 			if (!devices_handle_discard_safely) {
 				DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty.");
 				DMERR("Set dm-raid.devices_handle_discard_safely=Y to override.");
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 2ddc2d20e62d..a95cbb80fb34 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1124,7 +1124,6 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->num_flush_bios = 1;
 	ti->num_discard_bios = 1;
 	ti->per_io_data_size = sizeof(struct dm_raid1_bio_record);
-	ti->discard_zeroes_data_unsupported = true;
 
 	ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0);
 	if (!ms->kmirrord_wq) {
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 0b081d170087..bff7e3bdb4ed 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -298,9 +298,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
 			r = rq_end_io(tio->ti, clone, error, &tio->info);
 	}
 
-	if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
-		     !clone->q->limits.max_write_same_sectors))
-		disable_write_same(tio->md);
+	if (unlikely(r == -EREMOTEIO)) {
+		if (req_op(clone) == REQ_OP_WRITE_SAME &&
+		    !clone->q->limits.max_write_same_sectors)
+			disable_write_same(tio->md);
+		if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
+		    !clone->q->limits.max_write_zeroes_sectors)
+			disable_write_zeroes(tio->md);
+	}
 
 	if (r <= 0)
 		/* The target wants to complete the I/O */
@@ -358,7 +363,7 @@ static void dm_complete_request(struct request *rq, int error)
 	if (!rq->q->mq_ops)
 		blk_complete_request(rq);
 	else
-		blk_mq_complete_request(rq, error);
+		blk_mq_complete_request(rq);
 }
 
 /*
@@ -762,7 +767,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
-static struct blk_mq_ops dm_mq_ops = {
+static const struct blk_mq_ops dm_mq_ops = {
 	.queue_rq = dm_mq_queue_rq,
 	.complete = dm_softirq_done,
 	.init_request = dm_mq_init_request,
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 28193a57bf47..5ef49c121d99 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->num_flush_bios = stripes;
 	ti->num_discard_bios = stripes;
 	ti->num_write_same_bios = stripes;
+	ti->num_write_zeroes_bios = stripes;
 
 	sc->chunk_size = chunk_size;
 	if (chunk_size & (chunk_size - 1))
@@ -293,6 +294,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 		return DM_MAPIO_REMAPPED;
 	}
 	if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
+	    unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) ||
 	    unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
 		target_bio_nr = dm_bio_get_target_bio_nr(bio);
 		BUG_ON(target_bio_nr >= sc->stripes);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 3ad16d9c9d5a..958275aca008 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1449,22 +1449,6 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
 	return false;
 }
 
-static bool dm_table_discard_zeroes_data(struct dm_table *t)
-{
-	struct dm_target *ti;
-	unsigned i = 0;
-
-	/* Ensure that all targets supports discard_zeroes_data. */
-	while (i < dm_table_get_num_targets(t)) {
-		ti = dm_table_get_target(t, i++);
-
-		if (ti->discard_zeroes_data_unsupported)
-			return false;
-	}
-
-	return true;
-}
-
 static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
 			    sector_t start, sector_t len, void *data)
 {
@@ -1533,6 +1517,34 @@ static bool dm_table_supports_write_same(struct dm_table *t)
 	return true;
 }
 
+static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
+					   sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && !q->limits.max_write_zeroes_sectors;
+}
+
+static bool dm_table_supports_write_zeroes(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (!ti->num_write_zeroes_bios)
+			return false;
+
+		if (!ti->type->iterate_devices ||
+		    ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
+			return false;
+	}
+
+	return true;
+}
+
+
 static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
 				  sector_t start, sector_t len, void *data)
 {
@@ -1592,9 +1604,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	}
 	blk_queue_write_cache(q, wc, fua);
 
-	if (!dm_table_discard_zeroes_data(t))
-		q->limits.discard_zeroes_data = 0;
-
 	/* Ensure that all underlying devices are non-rotational. */
 	if (dm_table_all_devices_attribute(t, device_is_nonrot))
 		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
@@ -1603,6 +1612,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 
 	if (!dm_table_supports_write_same(t))
 		q->limits.max_write_same_sectors = 0;
+	if (!dm_table_supports_write_zeroes(t))
+		q->limits.max_write_zeroes_sectors = 0;
 
 	if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
 		queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 2b266a2b5035..a5f1916f621a 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3263,7 +3263,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	 * them down to the data device.  The thin device's discard
 	 * processing will cause mappings to be removed from the btree.
 	 */
-	ti->discard_zeroes_data_unsupported = true;
 	if (pf.discard_enabled && pf.discard_passdown) {
 		ti->num_discard_bios = 1;
 
@@ -4119,7 +4118,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	ti->per_io_data_size = sizeof(struct dm_thin_endio_hook);
 
 	/* In case the pool supports discards, pass them on. */
-	ti->discard_zeroes_data_unsupported = true;
 	if (tc->pool->pf.discard_enabled) {
 		ti->discards_supported = true;
 		ti->num_discard_bios = 1;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dfb75979e455..8bf397729bbd 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -810,7 +810,6 @@ static void dec_pending(struct dm_io *io, int error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			trace_block_bio_complete(md->queue, bio, io_error);
 			bio->bi_error = io_error;
 			bio_endio(bio);
 		}
@@ -825,6 +824,14 @@ void disable_write_same(struct mapped_device *md)
 	limits->max_write_same_sectors = 0;
 }
 
+void disable_write_zeroes(struct mapped_device *md)
+{
+	struct queue_limits *limits = dm_get_queue_limits(md);
+
+	/* device doesn't really support WRITE ZEROES, disable it */
+	limits->max_write_zeroes_sectors = 0;
+}
+
 static void clone_endio(struct bio *bio)
 {
 	int error = bio->bi_error;
@@ -851,9 +858,14 @@ static void clone_endio(struct bio *bio)
 		}
 	}
 
-	if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) &&
-		     !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
-		disable_write_same(md);
+	if (unlikely(r == -EREMOTEIO)) {
+		if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+		    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
+			disable_write_same(md);
+		if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+		    !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+			disable_write_zeroes(md);
+	}
 
 	free_tio(tio);
 	dec_pending(io, error);
@@ -1202,6 +1214,11 @@ static unsigned get_num_write_same_bios(struct dm_target *ti)
 	return ti->num_write_same_bios;
 }
 
+static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
+{
+	return ti->num_write_zeroes_bios;
+}
+
 typedef bool (*is_split_required_fn)(struct dm_target *ti);
 
 static bool is_split_required_for_discard(struct dm_target *ti)
@@ -1256,6 +1273,11 @@ static int __send_write_same(struct clone_info *ci)
 	return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
 }
 
+static int __send_write_zeroes(struct clone_info *ci)
+{
+	return __send_changing_extent_only(ci, get_num_write_zeroes_bios, NULL);
+}
+
 /*
  * Select the correct strategy for processing a non-flush bio.
  */
@@ -1270,6 +1292,8 @@ static int __split_and_process_non_flush(struct clone_info *ci)
 		return __send_discard(ci);
 	else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
 		return __send_write_same(ci);
+	else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
+		return __send_write_zeroes(ci);
 
 	ti = dm_table_find_target(ci->map, ci->sector);
 	if (!dm_target_is_valid(ti))
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 3e38e0207a3e..377a8a3672e3 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -293,6 +293,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
 						      split, disk_devt(mddev->gendisk),
 						      bio_sector);
 			mddev_check_writesame(mddev, split);
+			mddev_check_write_zeroes(mddev, split);
 			generic_make_request(split);
 		}
 	} while (split != bio);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index dde8ecb760c8..1e76d64ce180 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -709,4 +709,11 @@ static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
 	    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
 		mddev->queue->limits.max_write_same_sectors = 0;
 }
+
+static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
+{
+	if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+	    !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+		mddev->queue->limits.max_write_zeroes_sectors = 0;
+}
 #endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 79a12b59250b..e95d521d93e9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -139,6 +139,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
 	mp_bh->bio.bi_end_io = multipath_end_request;
 	mp_bh->bio.bi_private = mp_bh;
 	mddev_check_writesame(mddev, &mp_bh->bio);
+	mddev_check_write_zeroes(mddev, &mp_bh->bio);
 	generic_make_request(&mp_bh->bio);
 	return;
 }
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 93347ca7c7a6..ce7a6a56cf73 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -383,6 +383,7 @@ static int raid0_run(struct mddev *mddev)
 
 		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
+		blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
 		blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
 
 		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
@@ -504,6 +505,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 						      split, disk_devt(mddev->gendisk),
 						      bio_sector);
 			mddev_check_writesame(mddev, split);
+			mddev_check_write_zeroes(mddev, split);
 			generic_make_request(split);
 		}
 	} while (split != bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a34f58772022..b59cc100320a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3177,8 +3177,10 @@ static int raid1_run(struct mddev *mddev)
 	if (IS_ERR(conf))
 		return PTR_ERR(conf);
 
-	if (mddev->queue)
+	if (mddev->queue) {
 		blk_queue_max_write_same_sectors(mddev->queue, 0);
+		blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
+	}
 
 	rdev_for_each(rdev, mddev) {
 		if (!mddev->gendisk)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e89a8d78a9ed..28ec3a93acee 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3749,6 +3749,7 @@ static int raid10_run(struct mddev *mddev)
 		blk_queue_max_discard_sectors(mddev->queue,
 					      mddev->chunk_sectors);
 		blk_queue_max_write_same_sectors(mddev->queue, 0);
+		blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
 		blk_queue_io_min(mddev->queue, chunk_size);
 		if (conf->geo.raid_disks % conf->geo.near_copies)
 			blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ed5cd705b985..2efdb0d67460 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5031,8 +5031,6 @@ static void raid5_align_endio(struct bio *bi)
 	rdev_dec_pending(rdev, conf->mddev);
 
 	if (!error) {
-		trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
-					 raid_bi, 0);
 		bio_endio(raid_bi);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
 			wake_up(&conf->wait_for_quiescent);
@@ -7229,7 +7227,6 @@ static int raid5_run(struct mddev *mddev)
 
 	if (mddev->queue) {
 		int chunk_size;
-		bool discard_supported = true;
 		/* read-ahead size must cover two whole stripes, which
 		 * is 2 * (datadisks) * chunksize where 'n' is the
 		 * number of raid devices
@@ -7265,48 +7262,32 @@ static int raid5_run(struct mddev *mddev)
 		blk_queue_max_discard_sectors(mddev->queue,
 					      0xfffe * STRIPE_SECTORS);
 
-		/*
-		 * unaligned part of discard request will be ignored, so can't
-		 * guarantee discard_zeroes_data
-		 */
-		mddev->queue->limits.discard_zeroes_data = 0;
-
 		blk_queue_max_write_same_sectors(mddev->queue, 0);
+		blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
 
 		rdev_for_each(rdev, mddev) {
 			disk_stack_limits(mddev->gendisk, rdev->bdev,
 					  rdev->data_offset << 9);
 			disk_stack_limits(mddev->gendisk, rdev->bdev,
 					  rdev->new_data_offset << 9);
-			/*
-			 * discard_zeroes_data is required, otherwise data
-			 * could be lost. Consider a scenario: discard a stripe
-			 * (the stripe could be inconsistent if
-			 * discard_zeroes_data is 0); write one disk of the
-			 * stripe (the stripe could be inconsistent again
-			 * depending on which disks are used to calculate
-			 * parity); the disk is broken; The stripe data of this
-			 * disk is lost.
-			 */
-			if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) ||
-			    !bdev_get_queue(rdev->bdev)->
-						limits.discard_zeroes_data)
-				discard_supported = false;
-			/* Unfortunately, discard_zeroes_data is not currently
-			 * a guarantee - just a hint.  So we only allow DISCARD
-			 * if the sysadmin has confirmed that only safe devices
-			 * are in use by setting a module parameter.
-			 */
-			if (!devices_handle_discard_safely) {
-				if (discard_supported) {
-					pr_info("md/raid456: discard support disabled due to uncertainty.\n");
-					pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n");
-				}
-				discard_supported = false;
-			}
 		}
 
-		if (discard_supported &&
+		/*
+		 * zeroing is required, otherwise data
+		 * could be lost. Consider a scenario: discard a stripe
+		 * (the stripe could be inconsistent if
+		 * discard_zeroes_data is 0); write one disk of the
+		 * stripe (the stripe could be inconsistent again
+		 * depending on which disks are used to calculate
+		 * parity); the disk is broken; The stripe data of this
+		 * disk is lost.
+		 *
+		 * We only allow DISCARD if the sysadmin has confirmed that
+		 * only safe devices are in use by setting a module parameter.
+		 * A better idea might be to turn DISCARD into WRITE_ZEROES
+		 * requests, as that is required to be safe.
+		 */
+		if (devices_handle_discard_safely &&
 		    mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
 		    mddev->queue->limits.discard_granularity >= stripe)
 			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 493eb10ce580..4c54ad34e17a 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -167,8 +167,6 @@ static void mmc_queue_setup_discard(struct request_queue *q,
 
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 	blk_queue_max_discard_sectors(q, max_discard);
-	if (card->erased_byte == 0 && !mmc_can_discard(card))
-		q->limits.discard_zeroes_data = 1;
 	q->limits.discard_granularity = card->pref_erase << 9;
 	/* granularity must not be greater than max. discard */
 	if (card->pref_erase > max_discard)
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index e992a7f8a16f..2b32b88949ba 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -267,7 +267,7 @@ static void sdio_release_func(struct device *dev)
 	sdio_free_func_cis(func);
 
 	kfree(func->info);
-
+	kfree(func->tmpbuf);
 	kfree(func);
 }
 
@@ -282,6 +282,16 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card)
 	if (!func)
 		return ERR_PTR(-ENOMEM);
 
+	/*
+	 * allocate buffer separately to make sure it's properly aligned for
+	 * DMA usage (incl. 64 bit DMA)
+	 */
+	func->tmpbuf = kmalloc(4, GFP_KERNEL);
+	if (!func->tmpbuf) {
+		kfree(func);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	func->card = card;
 
 	device_initialize(&func->dev);
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index a9ac0b457313..8718432751c5 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -22,6 +22,7 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
@@ -1621,10 +1622,16 @@ static void dw_mci_init_card(struct mmc_host *mmc, struct mmc_card *card)
 
 		if (card->type == MMC_TYPE_SDIO ||
 		    card->type == MMC_TYPE_SD_COMBO) {
-			set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			if (!test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) {
+				pm_runtime_get_noresume(mmc->parent);
+				set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			}
 			clk_en_a = clk_en_a_old & ~clken_low_pwr;
 		} else {
-			clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			if (test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) {
+				pm_runtime_put_noidle(mmc->parent);
+				clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags);
+			}
 			clk_en_a = clk_en_a_old | clken_low_pwr;
 		}
 
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 7123ef96ed18..445fc47dc3e7 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -830,6 +830,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
 
 	switch (uhs) {
 	case MMC_TIMING_UHS_SDR50:
+	case MMC_TIMING_UHS_DDR50:
 		pinctrl = imx_data->pins_100mhz;
 		break;
 	case MMC_TIMING_UHS_SDR104:
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 66a9dedd1062..1517da3ddd7d 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -46,7 +46,7 @@
 
 #include "mtdcore.h"
 
-static struct backing_dev_info *mtd_bdi;
+struct backing_dev_info *mtd_bdi;
 
 #ifdef CONFIG_PM_SLEEP
 
@@ -496,11 +496,9 @@ int add_mtd_device(struct mtd_info *mtd)
 	 * mtd_device_parse_register() multiple times on the same master MTD,
 	 * especially with CONFIG_MTD_PARTITIONED_MASTER=y.
 	 */
-	if (WARN_ONCE(mtd->backing_dev_info, "MTD already registered\n"))
+	if (WARN_ONCE(mtd->dev.type, "MTD already registered\n"))
 		return -EEXIST;
 
-	mtd->backing_dev_info = mtd_bdi;
-
 	BUG_ON(mtd->writesize == 0);
 	mutex_lock(&mtd_table_mutex);
 
@@ -1775,13 +1773,18 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name)
 	struct backing_dev_info *bdi;
 	int ret;
 
-	bdi = kzalloc(sizeof(*bdi), GFP_KERNEL);
+	bdi = bdi_alloc(GFP_KERNEL);
 	if (!bdi)
 		return ERR_PTR(-ENOMEM);
 
-	ret = bdi_setup_and_register(bdi, name);
+	bdi->name = name;
+	/*
+	 * We put '-0' suffix to the name to get the same name format as we
+	 * used to get. Since this is called only once, we get a unique name. 
+	 */
+	ret = bdi_register(bdi, "%.28s-0", name);
 	if (ret)
-		kfree(bdi);
+		bdi_put(bdi);
 
 	return ret ? ERR_PTR(ret) : bdi;
 }
@@ -1813,8 +1816,7 @@ static int __init init_mtd(void)
 out_procfs:
 	if (proc_mtd)
 		remove_proc_entry("mtd", NULL);
-	bdi_destroy(mtd_bdi);
-	kfree(mtd_bdi);
+	bdi_put(mtd_bdi);
 err_bdi:
 	class_unregister(&mtd_class);
 err_reg:
@@ -1828,8 +1830,7 @@ static void __exit cleanup_mtd(void)
 	if (proc_mtd)
 		remove_proc_entry("mtd", NULL);
 	class_unregister(&mtd_class);
-	bdi_destroy(mtd_bdi);
-	kfree(mtd_bdi);
+	bdi_put(mtd_bdi);
 	idr_destroy(&mtd_idr);
 }
 
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 20c02a3b7417..e43fea896d1e 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -18,6 +18,7 @@
 #include <linux/ctype.h>
 #include <linux/slab.h>
 #include <linux/major.h>
+#include <linux/backing-dev.h>
 
 /*
  * compare superblocks to see if they're equivalent
@@ -38,6 +39,8 @@ static int get_sb_mtd_compare(struct super_block *sb, void *_mtd)
 	return 0;
 }
 
+extern struct backing_dev_info *mtd_bdi;
+
 /*
  * mark the superblock by the MTD device it is using
  * - set the device number to be the correct MTD block device for pesuperstence
@@ -49,7 +52,8 @@ static int get_sb_mtd_set(struct super_block *sb, void *_mtd)
 
 	sb->s_mtd = mtd;
 	sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, mtd->index);
-	sb->s_bdi = mtd->backing_dev_info;
+	sb->s_bdi = bdi_get(mtd_bdi);
+
 	return 0;
 }
 
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index c80869e60909..51f2be8889b5 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -347,7 +347,7 @@ static int ubiblock_init_request(void *data, struct request *req,
 	return 0;
 }
 
-static struct blk_mq_ops ubiblock_mq_ops = {
+static const struct blk_mq_ops ubiblock_mq_ops = {
 	.queue_rq       = ubiblock_queue_rq,
 	.init_request	= ubiblock_init_request,
 };
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 0134ba32a057..39712560b4c1 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -148,11 +148,11 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol,
 			return err;
 	}
 
-	if (bytes == 0) {
-		err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL);
-		if (err)
-			return err;
+	err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL);
+	if (err)
+		return err;
 
+	if (bytes == 0) {
 		err = clear_update_marker(ubi, vol, 0);
 		if (err)
 			return err;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8a4ba8b88e52..34481c9be1d1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1104,11 +1104,11 @@ static void bond_compute_features(struct bonding *bond)
 		gso_max_size = min(gso_max_size, slave->dev->gso_max_size);
 		gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs);
 	}
+	bond_dev->hard_header_len = max_hard_header_len;
 
 done:
 	bond_dev->vlan_features = vlan_features;
 	bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
-	bond_dev->hard_header_len = max_hard_header_len;
 	bond_dev->gso_max_segs = gso_max_segs;
 	netif_set_gso_max_size(bond_dev, gso_max_size);
 
diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 8483a40e7e9e..5f9e0e6301d0 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -72,6 +72,8 @@ config CAN_PEAK_USB
 	  PCAN-USB Pro         dual CAN 2.0b channels USB adapter
 	  PCAN-USB FD          single CAN-FD channel USB adapter
 	  PCAN-USB Pro FD      dual CAN-FD channels USB adapter
+	  PCAN-Chip USB        CAN-FD to USB stamp module
+	  PCAN-USB X6          6 CAN-FD channels USB adapter
 
 	  (see also http://www.peak-system.com).
 
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 300349fe8dc0..eecee7f8dfb7 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = {
 static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 {
 	struct gs_can *dev = netdev_priv(netdev);
-	struct gs_identify_mode imode;
+	struct gs_identify_mode *imode;
 	int rc;
 
+	imode = kmalloc(sizeof(*imode), GFP_KERNEL);
+
+	if (!imode)
+		return -ENOMEM;
+
 	if (do_identify)
-		imode.mode = GS_CAN_IDENTIFY_ON;
+		imode->mode = GS_CAN_IDENTIFY_ON;
 	else
-		imode.mode = GS_CAN_IDENTIFY_OFF;
+		imode->mode = GS_CAN_IDENTIFY_OFF;
 
 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
 			     usb_sndctrlpipe(interface_to_usbdev(dev->iface),
@@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 			     USB_RECIP_INTERFACE,
 			     dev->channel,
 			     0,
-			     &imode,
-			     sizeof(imode),
+			     imode,
+			     sizeof(*imode),
 			     100);
 
+	kfree(imode);
+
 	return (rc > 0) ? 0 : rc;
 }
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 0b0302af3bd2..57913dbbae0a 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = {
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)},
+	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)},
 	{} /* Terminating entry */
 };
@@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = {
 	&pcan_usb_pro,
 	&pcan_usb_fd,
 	&pcan_usb_pro_fd,
+	&pcan_usb_chip,
 	&pcan_usb_x6,
 };
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index 3cbfb069893d..c01316cac354 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -27,6 +27,7 @@
 #define PCAN_USBPRO_PRODUCT_ID		0x000d
 #define PCAN_USBPROFD_PRODUCT_ID	0x0011
 #define PCAN_USBFD_PRODUCT_ID		0x0012
+#define PCAN_USBCHIP_PRODUCT_ID		0x0013
 #define PCAN_USBX6_PRODUCT_ID		0x0014
 
 #define PCAN_USB_DRIVER_NAME		"peak_usb"
@@ -90,6 +91,7 @@ struct peak_usb_adapter {
 extern const struct peak_usb_adapter pcan_usb;
 extern const struct peak_usb_adapter pcan_usb_pro;
 extern const struct peak_usb_adapter pcan_usb_fd;
+extern const struct peak_usb_adapter pcan_usb_chip;
 extern const struct peak_usb_adapter pcan_usb_pro_fd;
 extern const struct peak_usb_adapter pcan_usb_x6;
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 304732550f0a..528d3bb4917f 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = {
 	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
 };
 
+/* describes the PCAN-CHIP USB */
+static const struct can_bittiming_const pcan_usb_chip_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TSLOW_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TSLOW_BRP_BITS),
+	.brp_inc = 1,
+};
+
+static const struct can_bittiming_const pcan_usb_chip_data_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TFAST_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TFAST_BRP_BITS),
+	.brp_inc = 1,
+};
+
+const struct peak_usb_adapter pcan_usb_chip = {
+	.name = "PCAN-Chip USB",
+	.device_id = PCAN_USBCHIP_PRODUCT_ID,
+	.ctrl_count = PCAN_USBFD_CHANNEL_COUNT,
+	.ctrlmode_supported = CAN_CTRLMODE_FD |
+		CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY,
+	.clock = {
+		.freq = PCAN_UFD_CRYSTAL_HZ,
+	},
+	.bittiming_const = &pcan_usb_chip_const,
+	.data_bittiming_const = &pcan_usb_chip_data_const,
+
+	/* size of device private data */
+	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
+
+	/* timestamps usage */
+	.ts_used_bits = 32,
+	.ts_period = 1000000, /* calibration period in ts. */
+	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
+	.us_per_ts_shift = 0,
+
+	/* give here messages in/out endpoints */
+	.ep_msg_in = PCAN_USBPRO_EP_MSGIN,
+	.ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0},
+
+	/* size of rx/tx usb buffers */
+	.rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE,
+	.tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE,
+
+	/* device callbacks */
+	.intf_probe = pcan_usb_pro_probe,	/* same as PCAN-USB Pro */
+	.dev_init = pcan_usb_fd_init,
+
+	.dev_exit = pcan_usb_fd_exit,
+	.dev_free = pcan_usb_fd_free,
+	.dev_set_bus = pcan_usb_fd_set_bus,
+	.dev_set_bittiming = pcan_usb_fd_set_bittiming_slow,
+	.dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast,
+	.dev_decode_buf = pcan_usb_fd_decode_buf,
+	.dev_start = pcan_usb_fd_start,
+	.dev_stop = pcan_usb_fd_stop,
+	.dev_restart_async = pcan_usb_fd_restart_async,
+	.dev_encode_msg = pcan_usb_fd_encode_msg,
+
+	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
+};
+
 /* describes the PCAN-USB Pro FD adapter */
 static const struct can_bittiming_const pcan_usb_pro_fd_const = {
 	.name = "pcan_usb_pro_fd",
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 8cf4801994e8..fa0eece21eef 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -326,6 +326,7 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid,
 
 static void b53_set_forwarding(struct b53_device *dev, int enable)
 {
+	struct dsa_switch *ds = dev->ds;
 	u8 mgmt;
 
 	b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
@@ -336,6 +337,15 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
 		mgmt &= ~SM_SW_FWD_EN;
 
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+
+	/* Include IMP port in dumb forwarding mode when no tagging protocol is
+	 * set
+	 */
+	if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) {
+		b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt);
+		mgmt |= B53_MII_DUMB_FWDG_EN;
+		b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt);
+	}
 }
 
 static void b53_enable_vlan(struct b53_device *dev, bool enable)
@@ -598,7 +608,8 @@ static void b53_switch_reset_gpio(struct b53_device *dev)
 
 static int b53_switch_reset(struct b53_device *dev)
 {
-	u8 mgmt;
+	unsigned int timeout = 1000;
+	u8 mgmt, reg;
 
 	b53_switch_reset_gpio(dev);
 
@@ -607,6 +618,28 @@ static int b53_switch_reset(struct b53_device *dev)
 		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x00);
 	}
 
+	/* This is specific to 58xx devices here, do not use is58xx() which
+	 * covers the larger Starfigther 2 family, including 7445/7278 which
+	 * still use this driver as a library and need to perform the reset
+	 * earlier.
+	 */
+	if (dev->chip_id == BCM58XX_DEVICE_ID) {
+		b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
+		reg |= SW_RST | EN_SW_RST | EN_CH_RST;
+		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg);
+
+		do {
+			b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
+			if (!(reg & SW_RST))
+				break;
+
+			usleep_range(1000, 2000);
+		} while (timeout-- > 0);
+
+		if (timeout == 0)
+			return -ETIMEDOUT;
+	}
+
 	b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt);
 
 	if (!(mgmt & SM_SW_FWD_EN)) {
@@ -1731,7 +1764,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans	= 4096,
 		.enabled_ports = 0x1ff,
 		.arl_entries = 4,
-		.cpu_port = B53_CPU_PORT_25,
+		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
 		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index 9fd24c418fa4..e5c86d44667a 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -104,6 +104,10 @@
 #define  B53_UC_FWD_EN			BIT(6)
 #define  B53_MC_FWD_EN			BIT(7)
 
+/* Switch control (8 bit) */
+#define B53_SWITCH_CTRL			0x22
+#define  B53_MII_DUMB_FWDG_EN		BIT(6)
+
 /* (16 bit) */
 #define B53_UC_FLOOD_MASK		0x32
 #define B53_MC_FLOOD_MASK		0x34
@@ -139,6 +143,7 @@
 /* Software reset register (8 bit) */
 #define B53_SOFTRESET			0x79
 #define   SW_RST			BIT(7)
+#define   EN_CH_RST			BIT(6)
 #define   EN_SW_RST			BIT(4)
 
 /* Fast Aging Control register (8 bit) */
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 64a1095e4d14..a0ca68ce3fbb 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -134,6 +134,7 @@ static void set_max_bgx_per_node(struct pci_dev *pdev)
 	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
 	switch (sdevid) {
 	case PCI_SUBSYS_DEVID_81XX_BGX:
+	case PCI_SUBSYS_DEVID_81XX_RGX:
 		max_bgx_per_node = MAX_BGX_PER_CN81XX;
 		break;
 	case PCI_SUBSYS_DEVID_83XX_BGX:
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index c5080f2cead5..6b7fe6fdd13b 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -16,6 +16,7 @@
 /* Subsystem device IDs */
 #define PCI_SUBSYS_DEVID_88XX_BGX		0xA126
 #define PCI_SUBSYS_DEVID_81XX_BGX		0xA226
+#define PCI_SUBSYS_DEVID_81XX_RGX		0xA254
 #define PCI_SUBSYS_DEVID_83XX_BGX		0xA326
 
 #define    MAX_BGX_THUNDER			8 /* Max 2 nodes, 4 per node */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 9e757684816d..93949139e62c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_dma *itxd, *txd;
-	struct mtk_tx_buf *tx_buf;
+	struct mtk_tx_buf *itx_buf, *tx_buf;
 	dma_addr_t mapped_addr;
 	unsigned int nr_frags;
 	int i, n_desc = 1;
@@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
 	txd4 |= fport;
 
-	tx_buf = mtk_desc_to_tx_buf(ring, itxd);
-	memset(tx_buf, 0, sizeof(*tx_buf));
+	itx_buf = mtk_desc_to_tx_buf(ring, itxd);
+	memset(itx_buf, 0, sizeof(*itx_buf));
 
 	if (gso)
 		txd4 |= TX_DMA_TSO;
@@ -647,9 +647,11 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 		return -ENOMEM;
 
 	WRITE_ONCE(itxd->txd1, mapped_addr);
-	tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
-	dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
-	dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
+	itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
+	itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+			  MTK_TX_FLAGS_FPORT1;
+	dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
+	dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
 
 	/* TX SG offload */
 	txd = itxd;
@@ -685,11 +687,13 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 					       last_frag * TX_DMA_LS0));
 			WRITE_ONCE(txd->txd4, fport);
 
-			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
 			tx_buf = mtk_desc_to_tx_buf(ring, txd);
 			memset(tx_buf, 0, sizeof(*tx_buf));
-
+			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
 			tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
+			tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+					 MTK_TX_FLAGS_FPORT1;
+
 			dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
 			dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
 			frag_size -= frag_map_size;
@@ -698,7 +702,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	/* store skb to cleanup */
-	tx_buf->skb = skb;
+	itx_buf->skb = skb;
 
 	WRITE_ONCE(itxd->txd4, txd4);
 	WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
@@ -1012,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 
 	while ((cpu != dma) && budget) {
 		u32 next_cpu = desc->txd2;
-		int mac;
+		int mac = 0;
 
 		desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
 		if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
 			break;
 
-		mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
-		       TX_DMA_FPORT_MASK;
-		mac--;
-
 		tx_buf = mtk_desc_to_tx_buf(ring, desc);
+		if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
+			mac = 1;
+
 		skb = tx_buf->skb;
 		if (!skb) {
 			condition = 1;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 99b1c8e9f16f..08285a96ff70 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -406,12 +406,18 @@ struct mtk_hw_stats {
 	struct u64_stats_sync	syncp;
 };
 
-/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
- * memory was allocated so that it can be freed properly
- */
 enum mtk_tx_flags {
+	/* PDMA descriptor can point at 1-2 segments. This enum allows us to
+	 * track how memory was allocated so that it can be freed properly.
+	 */
 	MTK_TX_FLAGS_SINGLE0	= 0x01,
 	MTK_TX_FLAGS_PAGE0	= 0x02,
+
+	/* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted
+	 * SKB out instead of looking up through hardware TX descriptor.
+	 */
+	MTK_TX_FLAGS_FPORT0	= 0x04,
+	MTK_TX_FLAGS_FPORT1	= 0x08,
 };
 
 /* This enum allows us to identify how the clock is defined on the array of the
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index dc52053128bc..3d9490cd2db1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -90,7 +90,7 @@
 #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX)
 
 #define MLX5_UMR_ALIGN				(2048)
-#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(128)
+#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(256)
 
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index d55fff0ba388..26fc77e80f7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *i
 	int idx = 0;
 	int err = 0;
 
+	info->data = MAX_NUM_OF_ETHTOOL_RULES;
 	while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
 		err = mlx5e_ethtool_get_flow(priv, info, location);
 		if (!err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 66c133757a5e..15cc7b469d2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -174,7 +174,7 @@ unlock:
 
 static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
-	struct mlx5e_sw_stats *s = &priv->stats.sw;
+	struct mlx5e_sw_stats temp, *s = &temp;
 	struct mlx5e_rq_stats *rq_stats;
 	struct mlx5e_sq_stats *sq_stats;
 	u64 tx_offload_none = 0;
@@ -229,6 +229,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 	s->link_down_events_phy = MLX5_GET(ppcnt_reg,
 				priv->stats.pport.phy_counters,
 				counter_set.phys_layer_cntrs.link_down_events);
+	memcpy(&priv->stats.sw, s, sizeof(*s));
 }
 
 static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
@@ -243,7 +244,6 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
 
-	memset(out, 0, outlen);
 	mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fade7233dac5..5436866798f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -639,7 +639,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 
 	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
 	    rep->vport != FDB_UPLINK_VPORT) {
-		if (min_inline > esw->offloads.inline_mode) {
+		if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
+		    esw->offloads.inline_mode < min_inline) {
 			netdev_warn(priv->netdev,
 				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
 				    min_inline, esw->offloads.inline_mode);
@@ -785,16 +786,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static int gen_vxlan_header_ipv4(struct net_device *out_dev,
-				 char buf[],
-				 unsigned char h_dest[ETH_ALEN],
-				 int ttl,
-				 __be32 daddr,
-				 __be32 saddr,
-				 __be16 udp_dst_port,
-				 __be32 vx_vni)
+static void gen_vxlan_header_ipv4(struct net_device *out_dev,
+				  char buf[], int encap_size,
+				  unsigned char h_dest[ETH_ALEN],
+				  int ttl,
+				  __be32 daddr,
+				  __be32 saddr,
+				  __be16 udp_dst_port,
+				  __be32 vx_vni)
 {
-	int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
 	struct ethhdr *eth = (struct ethhdr *)buf;
 	struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
 	struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
@@ -817,20 +817,17 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev,
 	udp->dest = udp_dst_port;
 	vxh->vx_flags = VXLAN_HF_VNI;
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-	return encap_size;
 }
 
-static int gen_vxlan_header_ipv6(struct net_device *out_dev,
-				 char buf[],
-				 unsigned char h_dest[ETH_ALEN],
-				 int ttl,
-				 struct in6_addr *daddr,
-				 struct in6_addr *saddr,
-				 __be16 udp_dst_port,
-				 __be32 vx_vni)
+static void gen_vxlan_header_ipv6(struct net_device *out_dev,
+				  char buf[], int encap_size,
+				  unsigned char h_dest[ETH_ALEN],
+				  int ttl,
+				  struct in6_addr *daddr,
+				  struct in6_addr *saddr,
+				  __be16 udp_dst_port,
+				  __be32 vx_vni)
 {
-	int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
 	struct ethhdr *eth = (struct ethhdr *)buf;
 	struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
 	struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
@@ -852,8 +849,6 @@ static int gen_vxlan_header_ipv6(struct net_device *out_dev,
 	udp->dest = udp_dst_port;
 	vxh->vx_flags = VXLAN_HF_VNI;
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-	return encap_size;
 }
 
 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
@@ -862,13 +857,20 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 					  struct net_device **out_dev)
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
 	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	int encap_size, ttl, err;
 	struct neighbour *n = NULL;
 	struct flowi4 fl4 = {};
 	char *encap_header;
+	int ttl, err;
+
+	if (max_encap_size < ipv4_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv4_encap_size, max_encap_size);
+		return -EOPNOTSUPP;
+	}
 
-	encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+	encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
 	if (!encap_header)
 		return -ENOMEM;
 
@@ -903,11 +905,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 
 	switch (e->tunnel_type) {
 	case MLX5_HEADER_TYPE_VXLAN:
-		encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
-						   e->h_dest, ttl,
-						   fl4.daddr,
-						   fl4.saddr, tun_key->tp_dst,
-						   tunnel_id_to_key32(tun_key->tun_id));
+		gen_vxlan_header_ipv4(*out_dev, encap_header,
+				      ipv4_encap_size, e->h_dest, ttl,
+				      fl4.daddr,
+				      fl4.saddr, tun_key->tp_dst,
+				      tunnel_id_to_key32(tun_key->tun_id));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -915,7 +917,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	}
 
 	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       encap_size, encap_header, &e->encap_id);
+			       ipv4_encap_size, encap_header, &e->encap_id);
 out:
 	if (err && n)
 		neigh_release(n);
@@ -930,13 +932,20 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
 	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	int encap_size, err, ttl = 0;
 	struct neighbour *n = NULL;
 	struct flowi6 fl6 = {};
 	char *encap_header;
+	int err, ttl = 0;
+
+	if (max_encap_size < ipv6_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv6_encap_size, max_encap_size);
+		return -EOPNOTSUPP;
+	}
 
-	encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+	encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
 	if (!encap_header)
 		return -ENOMEM;
 
@@ -972,11 +981,11 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 
 	switch (e->tunnel_type) {
 	case MLX5_HEADER_TYPE_VXLAN:
-		encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
-						   e->h_dest, ttl,
-						   &fl6.daddr,
-						   &fl6.saddr, tun_key->tp_dst,
-						   tunnel_id_to_key32(tun_key->tun_id));
+		gen_vxlan_header_ipv6(*out_dev, encap_header,
+				      ipv6_encap_size, e->h_dest, ttl,
+				      &fl6.daddr,
+				      &fl6.saddr, tun_key->tp_dst,
+				      tunnel_id_to_key32(tun_key->tun_id));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -984,7 +993,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 	}
 
 	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       encap_size, encap_header, &e->encap_id);
+			       ipv6_encap_size, encap_header, &e->encap_id);
 out:
 	if (err && n)
 		neigh_release(n);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 307ec6c5fd3b..d111cebca9f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -911,8 +911,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	int num_vports = esw->enabled_vports;
-	int err;
-	int vport;
+	int err, vport;
 	u8 mlx5_mode;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
@@ -921,9 +920,17 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+		if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+			return 0;
+		/* fall through */
+	case MLX5_CAP_INLINE_MODE_L2:
+		esw_warn(dev, "Inline mode can't be set\n");
 		return -EOPNOTSUPP;
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		break;
+	}
 
 	if (esw->offloads.num_flows > 0) {
 		esw_warn(dev, "Can't set inline mode when flows are configured\n");
@@ -966,18 +973,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-		return -EOPNOTSUPP;
-
 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
 
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 {
+	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 	struct mlx5_core_dev *dev = esw->dev;
 	int vport;
-	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
@@ -985,10 +988,18 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-		return -EOPNOTSUPP;
+	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+		mlx5_mode = MLX5_INLINE_MODE_NONE;
+		goto out;
+	case MLX5_CAP_INLINE_MODE_L2:
+		mlx5_mode = MLX5_INLINE_MODE_L2;
+		goto out;
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		goto query_vports;
+	}
 
+query_vports:
 	for (vport = 1; vport <= nvfs; vport++) {
 		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
 		if (vport > 1 && prev_mlx5_mode != mlx5_mode)
@@ -996,6 +1007,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 		prev_mlx5_mode = mlx5_mode;
 	}
 
+out:
 	*mode = mlx5_mode;
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 60154a175bd3..0ad66324247f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1029,7 +1029,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (err) {
 		dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
 			FW_INIT_TIMEOUT_MILI);
-		goto out_err;
+		goto err_cmd_cleanup;
 	}
 
 	err = mlx5_core_enable_hca(dev, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 2e6b0f290ddc..222b25908d01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -87,6 +87,7 @@ static void up_rel_func(struct kref *kref)
 	struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
 
 	list_del(&up->list);
+	iounmap(up->map);
 	if (mlx5_cmd_free_uar(up->mdev, up->index))
 		mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
 	kfree(up->reg_bitmap);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 5bd36a4a8fcd..cfdadb658ade 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -64,11 +64,11 @@
 	((u32)(prio_tc_tbl >> ((7 - prio) * 4)) & 0x7)
 
 static const struct qed_dcbx_app_metadata qed_dcbx_app_update[] = {
-	{DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH}
+	{DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_ISCSI},
+	{DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_FCOE},
+	{DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_ETH_ROCE},
+	{DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_ETH_ROCE},
+	{DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH},
 };
 
 static bool qed_dcbx_app_ethtype(u32 app_info_bitmap)
@@ -583,6 +583,13 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn,
 		   p_params->ets_cbs,
 		   p_ets->pri_tc_tbl[0], p_params->max_ets_tc);
 
+	if (p_params->ets_enabled && !p_params->max_ets_tc) {
+		p_params->max_ets_tc = QED_MAX_PFC_PRIORITIES;
+		DP_VERBOSE(p_hwfn, QED_MSG_DCB,
+			   "ETS params: max_ets_tc is forced to %d\n",
+		p_params->max_ets_tc);
+	}
+
 	/* 8 bit tsa and bw data corresponding to each of the 8 TC's are
 	 * encoded in a type u32 array of size 2.
 	 */
@@ -1001,6 +1008,8 @@ qed_dcbx_set_pfc_data(struct qed_hwfn *p_hwfn,
 	u8 pfc_map = 0;
 	int i;
 
+	*pfc &= ~DCBX_PFC_ERROR_MASK;
+
 	if (p_params->pfc.willing)
 		*pfc |= DCBX_PFC_WILLING_MASK;
 	else
@@ -1255,7 +1264,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn,
 {
 	struct qed_dcbx_get *dcbx_info;
 
-	dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
+	dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_ATOMIC);
 	if (!dcbx_info)
 		return NULL;
 
@@ -2073,6 +2082,8 @@ static int qed_dcbnl_ieee_setpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
 	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++)
 		dcbx_set.config.params.pfc.prio[i] = !!(pfc->pfc_en & BIT(i));
 
+	dcbx_set.config.params.pfc.max_tc = pfc->pfc_cap;
+
 	ptt = qed_ptt_acquire(hwfn);
 	if (!ptt)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 8cfc4a54f2dc..3cd7989c007d 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1516,11 +1516,12 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		spin_unlock_irqrestore(&priv->lock, flags);
 		return NETDEV_TX_BUSY;
 	}
-	entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
-	priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
 
 	if (skb_put_padto(skb, ETH_ZLEN))
-		goto drop;
+		goto exit;
+
+	entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
+	priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
 
 	buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
 		 entry / NUM_TX_DESC * DPTR_ALIGN;
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 54248775f227..f68c4db656ed 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1127,12 +1127,70 @@ static struct mdiobb_ops bb_ops = {
 	.get_mdio_data = sh_get_mdio,
 };
 
+/* free Tx skb function */
+static int sh_eth_tx_free(struct net_device *ndev, bool sent_only)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	struct sh_eth_txdesc *txdesc;
+	int free_num = 0;
+	int entry;
+	bool sent;
+
+	for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
+		entry = mdp->dirty_tx % mdp->num_tx_ring;
+		txdesc = &mdp->tx_ring[entry];
+		sent = !(txdesc->status & cpu_to_le32(TD_TACT));
+		if (sent_only && !sent)
+			break;
+		/* TACT bit must be checked before all the following reads */
+		dma_rmb();
+		netif_info(mdp, tx_done, ndev,
+			   "tx entry %d status 0x%08x\n",
+			   entry, le32_to_cpu(txdesc->status));
+		/* Free the original skb. */
+		if (mdp->tx_skbuff[entry]) {
+			dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
+					 le32_to_cpu(txdesc->len) >> 16,
+					 DMA_TO_DEVICE);
+			dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
+			mdp->tx_skbuff[entry] = NULL;
+			free_num++;
+		}
+		txdesc->status = cpu_to_le32(TD_TFP);
+		if (entry >= mdp->num_tx_ring - 1)
+			txdesc->status |= cpu_to_le32(TD_TDLE);
+
+		if (sent) {
+			ndev->stats.tx_packets++;
+			ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
+		}
+	}
+	return free_num;
+}
+
 /* free skb and descriptor buffer */
 static void sh_eth_ring_free(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int ringsize, i;
 
+	if (mdp->rx_ring) {
+		for (i = 0; i < mdp->num_rx_ring; i++) {
+			if (mdp->rx_skbuff[i]) {
+				struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i];
+
+				dma_unmap_single(&ndev->dev,
+						 le32_to_cpu(rxdesc->addr),
+						 ALIGN(mdp->rx_buf_sz, 32),
+						 DMA_FROM_DEVICE);
+			}
+		}
+		ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
+		dma_free_coherent(NULL, ringsize, mdp->rx_ring,
+				  mdp->rx_desc_dma);
+		mdp->rx_ring = NULL;
+	}
+
 	/* Free Rx skb ringbuffer */
 	if (mdp->rx_skbuff) {
 		for (i = 0; i < mdp->num_rx_ring; i++)
@@ -1141,27 +1199,18 @@ static void sh_eth_ring_free(struct net_device *ndev)
 	kfree(mdp->rx_skbuff);
 	mdp->rx_skbuff = NULL;
 
-	/* Free Tx skb ringbuffer */
-	if (mdp->tx_skbuff) {
-		for (i = 0; i < mdp->num_tx_ring; i++)
-			dev_kfree_skb(mdp->tx_skbuff[i]);
-	}
-	kfree(mdp->tx_skbuff);
-	mdp->tx_skbuff = NULL;
-
-	if (mdp->rx_ring) {
-		ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
-		dma_free_coherent(NULL, ringsize, mdp->rx_ring,
-				  mdp->rx_desc_dma);
-		mdp->rx_ring = NULL;
-	}
-
 	if (mdp->tx_ring) {
+		sh_eth_tx_free(ndev, false);
+
 		ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring;
 		dma_free_coherent(NULL, ringsize, mdp->tx_ring,
 				  mdp->tx_desc_dma);
 		mdp->tx_ring = NULL;
 	}
+
+	/* Free Tx skb ringbuffer */
+	kfree(mdp->tx_skbuff);
+	mdp->tx_skbuff = NULL;
 }
 
 /* format skb and descriptor buffer */
@@ -1409,43 +1458,6 @@ static void sh_eth_dev_exit(struct net_device *ndev)
 	update_mac_address(ndev);
 }
 
-/* free Tx skb function */
-static int sh_eth_txfree(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-	struct sh_eth_txdesc *txdesc;
-	int free_num = 0;
-	int entry;
-
-	for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
-		entry = mdp->dirty_tx % mdp->num_tx_ring;
-		txdesc = &mdp->tx_ring[entry];
-		if (txdesc->status & cpu_to_le32(TD_TACT))
-			break;
-		/* TACT bit must be checked before all the following reads */
-		dma_rmb();
-		netif_info(mdp, tx_done, ndev,
-			   "tx entry %d status 0x%08x\n",
-			   entry, le32_to_cpu(txdesc->status));
-		/* Free the original skb. */
-		if (mdp->tx_skbuff[entry]) {
-			dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
-					 le32_to_cpu(txdesc->len) >> 16,
-					 DMA_TO_DEVICE);
-			dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
-			mdp->tx_skbuff[entry] = NULL;
-			free_num++;
-		}
-		txdesc->status = cpu_to_le32(TD_TFP);
-		if (entry >= mdp->num_tx_ring - 1)
-			txdesc->status |= cpu_to_le32(TD_TDLE);
-
-		ndev->stats.tx_packets++;
-		ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
-	}
-	return free_num;
-}
-
 /* Packet receive function */
 static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 {
@@ -1690,7 +1702,7 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status)
 			   intr_status, mdp->cur_tx, mdp->dirty_tx,
 			   (u32)ndev->state, edtrr);
 		/* dirty buffer free */
-		sh_eth_txfree(ndev);
+		sh_eth_tx_free(ndev, true);
 
 		/* SH7712 BUG */
 		if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) {
@@ -1751,7 +1763,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev)
 		/* Clear Tx interrupts */
 		sh_eth_write(ndev, intr_status & cd->tx_check, EESR);
 
-		sh_eth_txfree(ndev);
+		sh_eth_tx_free(ndev, true);
 		netif_wake_queue(ndev);
 	}
 
@@ -2412,7 +2424,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	spin_lock_irqsave(&mdp->lock, flags);
 	if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) {
-		if (!sh_eth_txfree(ndev)) {
+		if (!sh_eth_tx_free(ndev, true)) {
 			netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n");
 			netif_stop_queue(ndev);
 			spin_unlock_irqrestore(&mdp->lock, flags);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 50d28261b6b9..b9cb697b2818 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
 		free_cpumask_var(thread_mask);
 	}
 
+	if (count > EFX_MAX_RX_QUEUES) {
+		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+			       "Reducing number of rx queues from %u to %u.\n",
+			       count, EFX_MAX_RX_QUEUES);
+		count = EFX_MAX_RX_QUEUES;
+	}
+
 	/* If RSS is requested for the PF *and* VFs then we can't write RSS
 	 * table entries that are inaccessible to VFs
 	 */
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index ee14662415c5..a0c52e328102 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
 #define EFX_RXQ_MIN_ENT		128U
 #define EFX_TXQ_MIN_ENT(efx)	(2 * efx_tx_max_skb_descs(efx))
 
-#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_35388(efx) ? \
+/* All EF10 architecture NICs steal one bit of the DMAQ size for various
+ * other purposes when counting TxQ entries, so we halve the queue size.
+ */
+#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_EF10(efx) ? \
 				 EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE)
 
 static inline bool efx_rss_enabled(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index f5e5cd1659a1..29614da91cbf 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
 		free_cpumask_var(thread_mask);
 	}
 
+	if (count > EF4_MAX_RX_QUEUES) {
+		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+			       "Reducing number of rx queues from %u to %u.\n",
+			       count, EF4_MAX_RX_QUEUES);
+		count = EF4_MAX_RX_QUEUES;
+	}
+
 	return count;
 }
 
diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index 103f827a1623..c67fa18b8121 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h
@@ -16,6 +16,7 @@
  */
 
 #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
+#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 #define EFX_WORKAROUND_10G(efx) 1
 
 /* Bit-bashed I2C reads cause performance drop */
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 9e631952b86f..48a541eb0af2 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -76,7 +76,7 @@ config TI_CPSW
 config TI_CPTS
 	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW || TI_KEYSTONE_NETCP
-	depends on PTP_1588_CLOCK
+	depends on POSIX_TIMERS
 	---help---
 	  This driver supports the Common Platform Time Sync unit of
 	  the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem.
@@ -87,6 +87,8 @@ config TI_CPTS_MOD
 	tristate
 	depends on TI_CPTS
 	default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y
+	select NET_PTP_CLASSIFY
+	imply PTP_1588_CLOCK
 	default m
 
 config TI_KEYSTONE_NETCP
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index a45f98fa4aa7..3dadee1080b9 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -1017,8 +1017,8 @@ tc35815_free_queues(struct net_device *dev)
 			BUG_ON(lp->tx_skbs[i].skb != skb);
 #endif
 			if (skb) {
-				dev_kfree_skb(skb);
 				pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE);
+				dev_kfree_skb(skb);
 				lp->tx_skbs[i].skb = NULL;
 				lp->tx_skbs[i].skb_dma = 0;
 			}
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index f9f3dba7a588..db23cb36ae5c 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -751,7 +751,6 @@ struct netvsc_device {
 	u32 send_section_cnt;
 	u32 send_section_size;
 	unsigned long *send_section_map;
-	int map_words;
 
 	/* Used for NetVSP initialization protocol */
 	struct completion channel_init_wait;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 8dd0b8770328..15ef713d96c0 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -236,6 +236,7 @@ static int netvsc_init_buf(struct hv_device *device)
 	struct netvsc_device *net_device;
 	struct nvsp_message *init_packet;
 	struct net_device *ndev;
+	size_t map_words;
 	int node;
 
 	net_device = get_outbound_net_device(device);
@@ -401,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device)
 		   net_device->send_section_size, net_device->send_section_cnt);
 
 	/* Setup state for managing the send buffer. */
-	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
-					     BITS_PER_LONG);
+	map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
 
-	net_device->send_section_map = kcalloc(net_device->map_words,
-					       sizeof(ulong), GFP_KERNEL);
+	net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
 	if (net_device->send_section_map == NULL) {
 		ret = -ENOMEM;
 		goto cleanup;
@@ -683,7 +682,7 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
 	unsigned long *map_addr = net_device->send_section_map;
 	unsigned int i;
 
-	for_each_clear_bit(i, map_addr, net_device->map_words) {
+	for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
 		if (sync_test_and_set_bit(i, map_addr) == 0)
 			return i;
 	}
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index ff0a5ed3ca80..49ce4e9f4a0f 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -617,7 +617,8 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
 
 static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
 					     unsigned char **iv,
-					     struct scatterlist **sg)
+					     struct scatterlist **sg,
+					     int num_frags)
 {
 	size_t size, iv_offset, sg_offset;
 	struct aead_request *req;
@@ -629,7 +630,7 @@ static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
 
 	size = ALIGN(size, __alignof__(struct scatterlist));
 	sg_offset = size;
-	size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
+	size += sizeof(struct scatterlist) * num_frags;
 
 	tmp = kmalloc(size, GFP_ATOMIC);
 	if (!tmp)
@@ -649,6 +650,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 {
 	int ret;
 	struct scatterlist *sg;
+	struct sk_buff *trailer;
 	unsigned char *iv;
 	struct ethhdr *eth;
 	struct macsec_eth_header *hh;
@@ -723,7 +725,14 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 		return ERR_PTR(-EINVAL);
 	}
 
-	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg);
+	ret = skb_cow_data(skb, 0, &trailer);
+	if (unlikely(ret < 0)) {
+		macsec_txsa_put(tx_sa);
+		kfree_skb(skb);
+		return ERR_PTR(ret);
+	}
+
+	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret);
 	if (!req) {
 		macsec_txsa_put(tx_sa);
 		kfree_skb(skb);
@@ -732,7 +741,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 
 	macsec_fill_iv(iv, secy->sci, pn);
 
-	sg_init_table(sg, MAX_SKB_FRAGS + 1);
+	sg_init_table(sg, ret);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	if (tx_sc->encrypt) {
@@ -917,6 +926,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 {
 	int ret;
 	struct scatterlist *sg;
+	struct sk_buff *trailer;
 	unsigned char *iv;
 	struct aead_request *req;
 	struct macsec_eth_header *hdr;
@@ -927,7 +937,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg);
+	ret = skb_cow_data(skb, 0, &trailer);
+	if (unlikely(ret < 0)) {
+		kfree_skb(skb);
+		return ERR_PTR(ret);
+	}
+	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret);
 	if (!req) {
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
@@ -936,7 +951,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	hdr = (struct macsec_eth_header *)skb->data;
 	macsec_fill_iv(iv, sci, ntohl(hdr->packet_number));
 
-	sg_init_table(sg, MAX_SKB_FRAGS + 1);
+	sg_init_table(sg, ret);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	if (hdr->tci_an & MACSEC_TCI_E) {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9261722960a7..b34eaaae03fd 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1139,6 +1139,7 @@ static int macvlan_port_create(struct net_device *dev)
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = macvlan_port_get_rtnl(dev);
+	struct sk_buff *skb;
 
 	dev->priv_flags &= ~IFF_MACVLAN_PORT;
 	netdev_rx_handler_unregister(dev);
@@ -1147,7 +1148,15 @@ static void macvlan_port_destroy(struct net_device *dev)
 	 * but we need to cancel it and purge left skbs if any.
 	 */
 	cancel_work_sync(&port->bc_work);
-	__skb_queue_purge(&port->bc_queue);
+
+	while ((skb = __skb_dequeue(&port->bc_queue))) {
+		const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src;
+
+		if (src)
+			dev_put(src->dev);
+
+		kfree_skb(skb);
+	}
 
 	kfree(port);
 }
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index e2460a57e4b1..ed0d10f54f26 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1438,8 +1438,6 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,
 		skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT;
 		skb_queue_tail(&dp83640->rx_queue, skb);
 		schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT);
-	} else {
-		netif_rx_ni(skb);
 	}
 
 	return true;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6742070ca676..da5b39268370 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -297,17 +297,6 @@ static int kszphy_config_init(struct phy_device *phydev)
 	if (priv->led_mode >= 0)
 		kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
 
-	if (phy_interrupt_is_valid(phydev)) {
-		int ctl = phy_read(phydev, MII_BMCR);
-
-		if (ctl < 0)
-			return ctl;
-
-		ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE);
-		if (ret < 0)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -798,9 +787,6 @@ static struct phy_driver ksphy_driver[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -940,9 +926,6 @@ static struct phy_driver ksphy_driver[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -952,6 +935,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= PHY_GBIT_FEATURES,
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
 	.config_init	= ksz9021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -971,6 +955,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= PHY_GBIT_FEATURES,
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
 	.config_init	= ksz9031_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= ksz9031_read_status,
@@ -989,9 +974,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -1003,9 +985,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -1017,9 +996,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 } };
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index a2bfc82e95d7..97ff1278167b 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -591,16 +591,18 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 EXPORT_SYMBOL(phy_mii_ioctl);
 
 /**
- * phy_start_aneg - start auto-negotiation for this PHY device
+ * phy_start_aneg_priv - start auto-negotiation for this PHY device
  * @phydev: the phy_device struct
+ * @sync: indicate whether we should wait for the workqueue cancelation
  *
  * Description: Sanitizes the settings (if we're not autonegotiating
  *   them), and then calls the driver's config_aneg function.
  *   If the PHYCONTROL Layer is operating, we change the state to
  *   reflect the beginning of Auto-negotiation or forcing.
  */
-int phy_start_aneg(struct phy_device *phydev)
+static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
 {
+	bool trigger = 0;
 	int err;
 
 	if (!phydev->drv)
@@ -628,10 +630,40 @@ int phy_start_aneg(struct phy_device *phydev)
 		}
 	}
 
+	/* Re-schedule a PHY state machine to check PHY status because
+	 * negotiation may already be done and aneg interrupt may not be
+	 * generated.
+	 */
+	if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) {
+		err = phy_aneg_done(phydev);
+		if (err > 0) {
+			trigger = true;
+			err = 0;
+		}
+	}
+
 out_unlock:
 	mutex_unlock(&phydev->lock);
+
+	if (trigger)
+		phy_trigger_machine(phydev, sync);
+
 	return err;
 }
+
+/**
+ * phy_start_aneg - start auto-negotiation for this PHY device
+ * @phydev: the phy_device struct
+ *
+ * Description: Sanitizes the settings (if we're not autonegotiating
+ *   them), and then calls the driver's config_aneg function.
+ *   If the PHYCONTROL Layer is operating, we change the state to
+ *   reflect the beginning of Auto-negotiation or forcing.
+ */
+int phy_start_aneg(struct phy_device *phydev)
+{
+	return phy_start_aneg_priv(phydev, true);
+}
 EXPORT_SYMBOL(phy_start_aneg);
 
 /**
@@ -659,7 +691,7 @@ void phy_start_machine(struct phy_device *phydev)
  *   state machine runs.
  */
 
-static void phy_trigger_machine(struct phy_device *phydev, bool sync)
+void phy_trigger_machine(struct phy_device *phydev, bool sync)
 {
 	if (sync)
 		cancel_delayed_work_sync(&phydev->state_queue);
@@ -1154,7 +1186,7 @@ void phy_state_machine(struct work_struct *work)
 	mutex_unlock(&phydev->lock);
 
 	if (needs_aneg)
-		err = phy_start_aneg(phydev);
+		err = phy_start_aneg_priv(phydev, false);
 	else if (do_suspend)
 		phy_suspend(phydev);
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f8c81f12d988..85c01247f2e3 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2361,8 +2361,10 @@ start_again:
 
 	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_OPTIONS_GET);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;
@@ -2634,8 +2636,10 @@ start_again:
 
 	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_PORT_LIST_GET);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 3dd490f53e48..f28bd74ac275 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -369,7 +369,7 @@ config USB_NET_NET1080
 	  optionally with LEDs that indicate traffic
 
 config USB_NET_PLUSB
-	tristate "Prolific PL-2301/2302/25A1 based cables"
+	tristate "Prolific PL-2301/2302/25A1/27A1 based cables"
 	# if the handshake/init/reset problems, from original 'plusb',
 	# are ever resolved ... then remove "experimental"
 	depends on USB_USBNET
diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index 8a40202c0a17..c4f1c363e24b 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -254,14 +254,9 @@ static struct sk_buff *ch9200_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	tx_overhead = 0x40;
 
 	len = skb->len;
-	if (skb_headroom(skb) < tx_overhead) {
-		struct sk_buff *skb2;
-
-		skb2 = skb_copy_expand(skb, tx_overhead, 0, flags);
+	if (skb_cow_head(skb, tx_overhead)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	__skb_push(skb, tx_overhead);
diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index e221bfcee76b..947bea81d924 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 {
 	int len = skb->len;
 
-	if (skb_headroom(skb) < 2) {
-		struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags);
+	if (skb_cow_head(skb, 2)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 	skb_push(skb, 2);
 
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 4f2e8141dbe2..00067a0c51ca 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2534,13 +2534,6 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
 	SET_NETDEV_DEV(net, &interface->dev);
 	SET_NETDEV_DEVTYPE(net, &hso_type);
 
-	/* registering our net device */
-	result = register_netdev(net);
-	if (result) {
-		dev_err(&interface->dev, "Failed to register device\n");
-		goto exit;
-	}
-
 	/* start allocating */
 	for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
 		hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL);
@@ -2560,6 +2553,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
 
 	add_net_device(hso_dev);
 
+	/* registering our net device */
+	result = register_netdev(net);
+	if (result) {
+		dev_err(&interface->dev, "Failed to register device\n");
+		goto exit;
+	}
+
 	hso_log_port(hso_dev);
 
 	hso_create_rfkill(hso_dev, interface);
@@ -3279,9 +3279,9 @@ static void __exit hso_exit(void)
 	pr_info("unloaded\n");
 
 	tty_unregister_driver(tty_drv);
-	put_tty_driver(tty_drv);
 	/* deregister the usb driver */
 	usb_deregister(&hso_driver);
+	put_tty_driver(tty_drv);
 }
 
 /* Module definitions */
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 876f02f4945e..2a2c3edb6bad 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -803,18 +803,12 @@ static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb,
 	}
 
 	/* We now decide whether we can put our special header into the sk_buff */
-	if (skb_cloned(skb) || skb_headroom(skb) < 2) {
-		/* no such luck - we make our own */
-		struct sk_buff *copied_skb;
-		copied_skb = skb_copy_expand(skb, 2, 0, GFP_ATOMIC);
-		dev_kfree_skb_irq(skb);
-		skb = copied_skb;
-		if (!copied_skb) {
-			kaweth->stats.tx_errors++;
-			netif_start_queue(net);
-			spin_unlock_irq(&kaweth->device_lock);
-			return NETDEV_TX_OK;
-		}
+	if (skb_cow_head(skb, 2)) {
+		kaweth->stats.tx_errors++;
+		netif_start_queue(net);
+		spin_unlock_irq(&kaweth->device_lock);
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
 	}
 
 	private_header = (__le16 *)__skb_push(skb, 2);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 9889a70ff4f6..636f48f19d1e 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2607,14 +2607,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
 {
 	u32 tx_cmd_a, tx_cmd_b;
 
-	if (skb_headroom(skb) < TX_OVERHEAD) {
-		struct sk_buff *skb2;
-
-		skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags);
+	if (skb_cow_head(skb, TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	if (lan78xx_linearize(skb) < 0)
diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c
index 22e1a9a99a7d..6fe59373cba9 100644
--- a/drivers/net/usb/plusb.c
+++ b/drivers/net/usb/plusb.c
@@ -102,7 +102,7 @@ static int pl_reset(struct usbnet *dev)
 }
 
 static const struct driver_info	prolific_info = {
-	.description =	"Prolific PL-2301/PL-2302/PL-25A1",
+	.description =	"Prolific PL-2301/PL-2302/PL-25A1/PL-27A1",
 	.flags =	FLAG_POINTTOPOINT | FLAG_NO_SETINT,
 		/* some PL-2302 versions seem to fail usb_set_interface() */
 	.reset =	pl_reset,
@@ -139,6 +139,17 @@ static const struct usb_device_id	products [] = {
 					 * Host-to-Host Cable
 					 */
 	.driver_info =  (unsigned long) &prolific_info,
+
+},
+
+/* super speed cables */
+{
+	USB_DEVICE(0x067b, 0x27a1),     /* PL-27A1, no eeprom
+					 * also: goobay Active USB 3.0
+					 * Data Link,
+					 * Unitek Y-3501
+					 */
+	.driver_info =  (unsigned long) &prolific_info,
 },
 
 	{ },		// END
@@ -158,5 +169,5 @@ static struct usb_driver plusb_driver = {
 module_usb_driver(plusb_driver);
 
 MODULE_AUTHOR("David Brownell");
-MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1 USB Host to Host Link Driver");
+MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1/27A1 USB Host to Host Link Driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 0b17b40d7a4f..190de9a90f73 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2203,13 +2203,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
 {
 	u32 tx_cmd_a, tx_cmd_b;
 
-	if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
-		struct sk_buff *skb2 =
-			skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
+	if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS;
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 831aa33d078a..5f19fb0f025d 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -2001,13 +2001,13 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
 	/* We do not advertise SG, so skbs should be already linearized */
 	BUG_ON(skb_shinfo(skb)->nr_frags);
 
-	if (skb_headroom(skb) < overhead) {
-		struct sk_buff *skb2 = skb_copy_expand(skb,
-			overhead, 0, flags);
+	/* Make writable and expand header space by overhead if required */
+	if (skb_cow_head(skb, overhead)) {
+		/* Must deallocate here as returning NULL to indicate error
+		 * means the skb won't be deallocated in the caller.
+		 */
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	if (csum) {
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 4a1e9c489f1f..aadfe1d1c37e 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 
 	len = skb->len;
 
-	if (skb_headroom(skb) < SR_TX_OVERHEAD) {
-		struct sk_buff *skb2;
-
-		skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags);
+	if (skb_cow_head(skb, SR_TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
-		skb = skb2;
-		if (!skb)
-			return NULL;
+		return NULL;
 	}
 
 	__skb_push(skb, SR_TX_OVERHEAD);
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index d6988db1930d..7d909c8183e9 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1128,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
 		goto nla_put_failure;
 
 	/* rule only needs to appear once */
-	nlh->nlmsg_flags &= NLM_F_EXCL;
+	nlh->nlmsg_flags |= NLM_F_EXCL;
 
 	frh = nlmsg_data(nlh);
 	memset(frh, 0, sizeof(*frh));
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9583a5f58a1d..d5e0906262ea 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
-unsigned int nvme_max_retries = 5;
-module_param_named(max_retries, nvme_max_retries, uint, 0644);
+static u8 nvme_max_retries = 5;
+module_param_named(max_retries, nvme_max_retries, byte, 0644);
 MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
-EXPORT_SYMBOL_GPL(nvme_max_retries);
 
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
@@ -62,11 +61,66 @@ module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
 		 "max power saving latency for new devices; use PM QOS to change per device");
 
+static bool force_apst;
+module_param(force_apst, bool, 0644);
+MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
+
 static LIST_HEAD(nvme_ctrl_list);
 static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
+static int nvme_error_status(struct request *req)
+{
+	switch (nvme_req(req)->status & 0x7ff) {
+	case NVME_SC_SUCCESS:
+		return 0;
+	case NVME_SC_CAP_EXCEEDED:
+		return -ENOSPC;
+	default:
+		return -EIO;
+
+	/*
+	 * XXX: these errors are a nasty side-band protocol to
+	 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
+	 * anywhere..
+	 */
+	case NVME_SC_CMD_SEQ_ERROR:
+		return -EILSEQ;
+	case NVME_SC_ONCS_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case NVME_SC_WRITE_FAULT:
+	case NVME_SC_READ_ERROR:
+	case NVME_SC_UNWRITTEN_BLOCK:
+		return -ENODATA;
+	}
+}
+
+static inline bool nvme_req_needs_retry(struct request *req)
+{
+	if (blk_noretry_request(req))
+		return false;
+	if (nvme_req(req)->status & NVME_SC_DNR)
+		return false;
+	if (jiffies - req->start_time >= req->timeout)
+		return false;
+	if (nvme_req(req)->retries >= nvme_max_retries)
+		return false;
+	return true;
+}
+
+void nvme_complete_rq(struct request *req)
+{
+	if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
+		nvme_req(req)->retries++;
+		blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+		return;
+	}
+
+	blk_mq_end_request(req, nvme_error_status(req));
+}
+EXPORT_SYMBOL_GPL(nvme_complete_rq);
+
 void nvme_cancel_request(struct request *req, void *data, bool reserved)
 {
 	int status;
@@ -80,7 +134,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved)
 	status = NVME_SC_ABORT_REQ;
 	if (blk_queue_dying(req->q))
 		status |= NVME_SC_DNR;
-	blk_mq_complete_request(req, status);
+	nvme_req(req)->status = status;
+	blk_mq_complete_request(req);
+
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);
 
@@ -205,12 +261,6 @@ fail:
 	return NULL;
 }
 
-void nvme_requeue_req(struct request *req)
-{
-	blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
-}
-EXPORT_SYMBOL_GPL(nvme_requeue_req);
-
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags, int qid)
 {
@@ -327,6 +377,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 {
 	int ret = BLK_MQ_RQ_QUEUE_OK;
 
+	if (!(req->rq_flags & RQF_DONTPREP)) {
+		nvme_req(req)->retries = 0;
+		nvme_req(req)->flags = 0;
+		req->rq_flags |= RQF_DONTPREP;
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_DRV_IN:
 	case REQ_OP_DRV_OUT:
@@ -335,6 +391,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 	case REQ_OP_FLUSH:
 		nvme_setup_flush(ns, cmd);
 		break;
+	case REQ_OP_WRITE_ZEROES:
+		/* currently only aliased to deallocate for a few ctrls: */
 	case REQ_OP_DISCARD:
 		ret = nvme_setup_discard(ns, req, cmd);
 		break;
@@ -378,7 +436,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	blk_execute_rq(req->q, NULL, req, at_head);
 	if (result)
 		*result = nvme_req(req)->result;
-	ret = req->errors;
+	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+		ret = -EINTR;
+	else
+		ret = nvme_req(req)->status;
  out:
 	blk_mq_free_request(req);
 	return ret;
@@ -463,7 +524,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 	}
  submit:
 	blk_execute_rq(req->q, disk, req, 0);
-	ret = req->errors;
+	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+		ret = -EINTR;
+	else
+		ret = nvme_req(req)->status;
 	if (result)
 		*result = le32_to_cpu(nvme_req(req)->result.u32);
 	if (meta && !ret && !write) {
@@ -900,16 +964,14 @@ static void nvme_config_discard(struct nvme_ns *ns)
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
-		ns->queue->limits.discard_zeroes_data = 1;
-	else
-		ns->queue->limits.discard_zeroes_data = 0;
-
 	ns->queue->limits.discard_alignment = logical_block_size;
 	ns->queue->limits.discard_granularity = logical_block_size;
 	blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
 	blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+
+	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
+		blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
 }
 
 static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
@@ -1267,7 +1329,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 	 * heuristic: we are willing to spend at most 2% of the time
 	 * transitioning between power states.  Therefore, when running
 	 * in any given state, we will enter the next lower-power
-	 * non-operational state after waiting 100 * (enlat + exlat)
+	 * non-operational state after waiting 50 * (enlat + exlat)
 	 * microseconds, as long as that state's total latency is under
 	 * the requested maximum latency.
 	 *
@@ -1278,6 +1340,8 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 
 	unsigned apste;
 	struct nvme_feat_auto_pst *table;
+	u64 max_lat_us = 0;
+	int max_ps = -1;
 	int ret;
 
 	/*
@@ -1299,6 +1363,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 	if (ctrl->ps_max_latency_us == 0) {
 		/* Turn off APST. */
 		apste = 0;
+		dev_dbg(ctrl->device, "APST disabled\n");
 	} else {
 		__le64 target = cpu_to_le64(0);
 		int state;
@@ -1316,6 +1381,14 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 				table->entries[state] = target;
 
 			/*
+			 * Don't allow transitions to the deepest state
+			 * if it's quirked off.
+			 */
+			if (state == ctrl->npss &&
+			    (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS))
+				continue;
+
+			/*
 			 * Is this state a useful non-operational state for
 			 * higher-power states to autonomously transition to?
 			 */
@@ -1340,9 +1413,22 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 
 			target = cpu_to_le64((state << 3) |
 					     (transition_ms << 8));
+
+			if (max_ps == -1)
+				max_ps = state;
+
+			if (total_latency_us > max_lat_us)
+				max_lat_us = total_latency_us;
 		}
 
 		apste = 1;
+
+		if (max_ps == -1) {
+			dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n");
+		} else {
+			dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n",
+				max_ps, max_lat_us, (int)sizeof(*table), table);
+		}
 	}
 
 	ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
@@ -1387,16 +1473,15 @@ struct nvme_core_quirk_entry {
 };
 
 static const struct nvme_core_quirk_entry core_quirks[] = {
-	/*
-	 * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
-	 * the controller to go out to lunch.  It dies when the watchdog
-	 * timer reads CSTS and gets 0xffffffff.
-	 */
 	{
-		.vid = 0x144d,
-		.fr = "BXW75D0Q",
+		/*
+		 * This Toshiba device seems to die using any APST states.  See:
+		 * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11
+		 */
+		.vid = 0x1179,
+		.mn = "THNSF5256GPUK TOSHIBA",
 		.quirks = NVME_QUIRK_NO_APST,
-	},
+	}
 };
 
 /* match is null-terminated but idstr is space-padded. */
@@ -1481,6 +1566,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 		}
 	}
 
+	if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
+		dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+		ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
+	}
+
 	ctrl->oacs = le16_to_cpu(id->oacs);
 	ctrl->vid = le16_to_cpu(id->vid);
 	ctrl->oncs = le16_to_cpup(&id->oncs);
@@ -1503,7 +1593,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 
 	ctrl->npss = id->npss;
 	prev_apsta = ctrl->apsta;
-	ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta;
+	if (ctrl->quirks & NVME_QUIRK_NO_APST) {
+		if (force_apst && id->apsta) {
+			dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+			ctrl->apsta = 1;
+		} else {
+			ctrl->apsta = 0;
+		}
+	} else {
+		ctrl->apsta = id->apsta;
+	}
 	memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
 
 	if (ctrl->ops->is_fabrics) {
@@ -2386,7 +2485,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
 
 	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list)
-		blk_mq_freeze_queue_start(ns->queue);
+		blk_freeze_queue_start(ns->queue);
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_freeze);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5b7386f69f4d..990e6fb32a63 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -471,6 +471,16 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
 }
 EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
 
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
+{
+	if (ctrl->opts->max_reconnects != -1 &&
+	    ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
+
 /**
  * nvmf_register_transport() - NVMe Fabrics Library registration function.
  * @ops:	Transport ops instance to be registered to the
@@ -533,6 +543,7 @@ static const match_table_t opt_tokens = {
 	{ NVMF_OPT_QUEUE_SIZE,		"queue_size=%d"		},
 	{ NVMF_OPT_NR_IO_QUEUES,	"nr_io_queues=%d"	},
 	{ NVMF_OPT_RECONNECT_DELAY,	"reconnect_delay=%d"	},
+	{ NVMF_OPT_CTRL_LOSS_TMO,	"ctrl_loss_tmo=%d"	},
 	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
 	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
@@ -546,6 +557,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	char *options, *o, *p;
 	int token, ret = 0;
 	size_t nqnlen  = 0;
+	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
 
 	/* Set defaults */
 	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -655,6 +667,16 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			}
 			opts->kato = token;
 			break;
+		case NVMF_OPT_CTRL_LOSS_TMO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token < 0)
+				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
+			ctrl_loss_tmo = token;
+			break;
 		case NVMF_OPT_HOSTNQN:
 			if (opts->host) {
 				pr_err("hostnqn already user-assigned: %s\n",
@@ -710,6 +732,12 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 		}
 	}
 
+	if (ctrl_loss_tmo < 0)
+		opts->max_reconnects = -1;
+	else
+		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
+						opts->reconnect_delay);
+
 	if (!opts->host) {
 		kref_get(&nvmf_default_host->ref);
 		opts->host = nvmf_default_host;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 156018182ce4..f5a9c1fb186f 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -21,6 +21,8 @@
 #define NVMF_MAX_QUEUE_SIZE	1024
 #define NVMF_DEF_QUEUE_SIZE	128
 #define NVMF_DEF_RECONNECT_DELAY	10
+/* default to 600 seconds of reconnect attempts before giving up */
+#define NVMF_DEF_CTRL_LOSS_TMO		600
 
 /*
  * Define a host as seen by the target.  We allocate one at boot, but also
@@ -53,6 +55,7 @@ enum {
 	NVMF_OPT_HOSTNQN	= 1 << 8,
 	NVMF_OPT_RECONNECT_DELAY = 1 << 9,
 	NVMF_OPT_HOST_TRADDR	= 1 << 10,
+	NVMF_OPT_CTRL_LOSS_TMO	= 1 << 11,
 };
 
 /**
@@ -77,6 +80,10 @@ enum {
  * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
  * @kato:	Keep-alive timeout.
  * @host:	Virtual NVMe host, contains the NQN and Host ID.
+ * @nr_reconnects: number of reconnect attempted since the last ctrl failure
+ * @max_reconnects: maximum number of allowed reconnect attempts before removing
+ *              the controller, (-1) means reconnect forever, zero means remove
+ *              immediately;
  */
 struct nvmf_ctrl_options {
 	unsigned		mask;
@@ -91,6 +98,8 @@ struct nvmf_ctrl_options {
 	bool			discovery_nqn;
 	unsigned int		kato;
 	struct nvmf_host	*host;
+	int			nr_reconnects;
+	int			max_reconnects;
 };
 
 /*
@@ -133,5 +142,6 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
 const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 
 #endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d996ca73d3be..4976db56e351 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -19,6 +19,7 @@
 #include <linux/parser.h>
 #include <uapi/scsi/fc/fc_fs.h>
 #include <uapi/scsi/fc/fc_els.h>
+#include <linux/delay.h>
 
 #include "nvme.h"
 #include "fabrics.h"
@@ -44,6 +45,8 @@ enum nvme_fc_queue_flags {
 
 #define NVMEFC_QUEUE_DELAY	3		/* ms units */
 
+#define NVME_FC_MAX_CONNECT_ATTEMPTS	1
+
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
 	struct device		*dev;
@@ -61,16 +64,24 @@ struct nvme_fc_queue {
 	unsigned long		flags;
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
+enum nvme_fcop_flags {
+	FCOP_FLAGS_TERMIO	= (1 << 0),
+	FCOP_FLAGS_RELEASED	= (1 << 1),
+	FCOP_FLAGS_COMPLETE	= (1 << 2),
+	FCOP_FLAGS_AEN		= (1 << 3),
+};
+
 struct nvmefc_ls_req_op {
 	struct nvmefc_ls_req	ls_req;
 
-	struct nvme_fc_ctrl	*ctrl;
+	struct nvme_fc_rport	*rport;
 	struct nvme_fc_queue	*queue;
 	struct request		*rq;
+	u32			flags;
 
 	int			ls_error;
 	struct completion	ls_done;
-	struct list_head	lsreq_list;	/* ctrl->ls_req_list */
+	struct list_head	lsreq_list;	/* rport->ls_req_list */
 	bool			req_queued;
 };
 
@@ -79,6 +90,7 @@ enum nvme_fcpop_state {
 	FCPOP_STATE_IDLE	= 1,
 	FCPOP_STATE_ACTIVE	= 2,
 	FCPOP_STATE_ABORTED	= 3,
+	FCPOP_STATE_COMPLETE	= 4,
 };
 
 struct nvme_fc_fcp_op {
@@ -97,6 +109,7 @@ struct nvme_fc_fcp_op {
 	struct request		*rq;
 
 	atomic_t		state;
+	u32			flags;
 	u32			rqno;
 	u32			nents;
 
@@ -120,23 +133,24 @@ struct nvme_fc_rport {
 
 	struct list_head		endp_list; /* for lport->endp_list */
 	struct list_head		ctrl_list;
+	struct list_head		ls_req_list;
+	struct device			*dev;	/* physical device for dma */
+	struct nvme_fc_lport		*lport;
 	spinlock_t			lock;
 	struct kref			ref;
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
-enum nvme_fcctrl_state {
-	FCCTRL_INIT		= 0,
-	FCCTRL_ACTIVE		= 1,
+enum nvme_fcctrl_flags {
+	FCCTRL_TERMIO		= (1 << 0),
 };
 
 struct nvme_fc_ctrl {
 	spinlock_t		lock;
 	struct nvme_fc_queue	*queues;
-	u32			queue_count;
-
 	struct device		*dev;
 	struct nvme_fc_lport	*lport;
 	struct nvme_fc_rport	*rport;
+	u32			queue_count;
 	u32			cnum;
 
 	u64			association_id;
@@ -144,14 +158,19 @@ struct nvme_fc_ctrl {
 	u64			cap;
 
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
-	struct list_head	ls_req_list;
 
 	struct blk_mq_tag_set	admin_tag_set;
 	struct blk_mq_tag_set	tag_set;
 
 	struct work_struct	delete_work;
+	struct work_struct	reset_work;
+	struct delayed_work	connect_work;
+	int			reconnect_delay;
+	int			connect_attempts;
+
 	struct kref		ref;
-	int			state;
+	u32			flags;
+	u32			iocnt;
 
 	struct nvme_fc_fcp_op	aen_ops[NVME_FC_NR_AEN_COMMANDS];
 
@@ -419,9 +438,12 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 
 	INIT_LIST_HEAD(&newrec->endp_list);
 	INIT_LIST_HEAD(&newrec->ctrl_list);
+	INIT_LIST_HEAD(&newrec->ls_req_list);
 	kref_init(&newrec->ref);
 	spin_lock_init(&newrec->lock);
 	newrec->remoteport.localport = &lport->localport;
+	newrec->dev = lport->dev;
+	newrec->lport = lport;
 	newrec->remoteport.private = &newrec[1];
 	newrec->remoteport.port_role = pinfo->port_role;
 	newrec->remoteport.node_name = pinfo->node_name;
@@ -444,7 +466,6 @@ out_kfree_rport:
 out_reghost_failed:
 	*portptr = NULL;
 	return ret;
-
 }
 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
 
@@ -487,6 +508,30 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
 	return kref_get_unless_zero(&rport->ref);
 }
 
+static int
+nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
+{
+	struct nvmefc_ls_req_op *lsop;
+	unsigned long flags;
+
+restart:
+	spin_lock_irqsave(&rport->lock, flags);
+
+	list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) {
+		if (!(lsop->flags & FCOP_FLAGS_TERMIO)) {
+			lsop->flags |= FCOP_FLAGS_TERMIO;
+			spin_unlock_irqrestore(&rport->lock, flags);
+			rport->lport->ops->ls_abort(&rport->lport->localport,
+						&rport->remoteport,
+						&lsop->ls_req);
+			goto restart;
+		}
+	}
+	spin_unlock_irqrestore(&rport->lock, flags);
+
+	return 0;
+}
+
 /**
  * nvme_fc_unregister_remoteport - transport entry point called by an
  *                              LLDD to deregister/remove a previously
@@ -522,6 +567,8 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
 
 	spin_unlock_irqrestore(&rport->lock, flags);
 
+	nvme_fc_abort_lsops(rport);
+
 	nvme_fc_rport_put(rport);
 	return 0;
 }
@@ -624,16 +671,16 @@ static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
 
 
 static void
-__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl,
-		struct nvmefc_ls_req_op *lsop)
+__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
 {
+	struct nvme_fc_rport *rport = lsop->rport;
 	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ctrl->lock, flags);
+	spin_lock_irqsave(&rport->lock, flags);
 
 	if (!lsop->req_queued) {
-		spin_unlock_irqrestore(&ctrl->lock, flags);
+		spin_unlock_irqrestore(&rport->lock, flags);
 		return;
 	}
 
@@ -641,56 +688,71 @@ __nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl,
 
 	lsop->req_queued = false;
 
-	spin_unlock_irqrestore(&ctrl->lock, flags);
+	spin_unlock_irqrestore(&rport->lock, flags);
 
-	fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma,
+	fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
 				  (lsreq->rqstlen + lsreq->rsplen),
 				  DMA_BIDIRECTIONAL);
 
-	nvme_fc_ctrl_put(ctrl);
+	nvme_fc_rport_put(rport);
 }
 
 static int
-__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl,
+__nvme_fc_send_ls_req(struct nvme_fc_rport *rport,
 		struct nvmefc_ls_req_op *lsop,
 		void (*done)(struct nvmefc_ls_req *req, int status))
 {
 	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
 	unsigned long flags;
-	int ret;
+	int ret = 0;
 
-	if (!nvme_fc_ctrl_get(ctrl))
+	if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
+		return -ECONNREFUSED;
+
+	if (!nvme_fc_rport_get(rport))
 		return -ESHUTDOWN;
 
 	lsreq->done = done;
-	lsop->ctrl = ctrl;
+	lsop->rport = rport;
 	lsop->req_queued = false;
 	INIT_LIST_HEAD(&lsop->lsreq_list);
 	init_completion(&lsop->ls_done);
 
-	lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr,
+	lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr,
 				  lsreq->rqstlen + lsreq->rsplen,
 				  DMA_BIDIRECTIONAL);
-	if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) {
-		nvme_fc_ctrl_put(ctrl);
-		dev_err(ctrl->dev,
-			"els request command failed EFAULT.\n");
-		return -EFAULT;
+	if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) {
+		ret = -EFAULT;
+		goto out_putrport;
 	}
 	lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
 
-	spin_lock_irqsave(&ctrl->lock, flags);
+	spin_lock_irqsave(&rport->lock, flags);
 
-	list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list);
+	list_add_tail(&lsop->lsreq_list, &rport->ls_req_list);
 
 	lsop->req_queued = true;
 
-	spin_unlock_irqrestore(&ctrl->lock, flags);
+	spin_unlock_irqrestore(&rport->lock, flags);
 
-	ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport,
-					&ctrl->rport->remoteport, lsreq);
+	ret = rport->lport->ops->ls_req(&rport->lport->localport,
+					&rport->remoteport, lsreq);
 	if (ret)
-		lsop->ls_error = ret;
+		goto out_unlink;
+
+	return 0;
+
+out_unlink:
+	lsop->ls_error = ret;
+	spin_lock_irqsave(&rport->lock, flags);
+	lsop->req_queued = false;
+	list_del(&lsop->lsreq_list);
+	spin_unlock_irqrestore(&rport->lock, flags);
+	fc_dma_unmap_single(rport->dev, lsreq->rqstdma,
+				  (lsreq->rqstlen + lsreq->rsplen),
+				  DMA_BIDIRECTIONAL);
+out_putrport:
+	nvme_fc_rport_put(rport);
 
 	return ret;
 }
@@ -705,15 +767,15 @@ nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status)
 }
 
 static int
-nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop)
+nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop)
 {
 	struct nvmefc_ls_req *lsreq = &lsop->ls_req;
 	struct fcnvme_ls_rjt *rjt = lsreq->rspaddr;
 	int ret;
 
-	ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done);
+	ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done);
 
-	if (!ret)
+	if (!ret) {
 		/*
 		 * No timeout/not interruptible as we need the struct
 		 * to exist until the lldd calls us back. Thus mandate
@@ -722,14 +784,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop)
 		 */
 		wait_for_completion(&lsop->ls_done);
 
-	__nvme_fc_finish_ls_req(ctrl, lsop);
+		__nvme_fc_finish_ls_req(lsop);
 
-	if (ret) {
-		dev_err(ctrl->dev,
-			"ls request command failed (%d).\n", ret);
-		return ret;
+		ret = lsop->ls_error;
 	}
 
+	if (ret)
+		return ret;
+
 	/* ACC or RJT payload ? */
 	if (rjt->w0.ls_cmd == FCNVME_LS_RJT)
 		return -ENXIO;
@@ -737,19 +799,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop)
 	return 0;
 }
 
-static void
-nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl,
+static int
+nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
 		struct nvmefc_ls_req_op *lsop,
 		void (*done)(struct nvmefc_ls_req *req, int status))
 {
-	int ret;
-
-	ret = __nvme_fc_send_ls_req(ctrl, lsop, done);
-
 	/* don't wait for completion */
 
-	if (ret)
-		done(&lsop->ls_req, ret);
+	return __nvme_fc_send_ls_req(rport, lsop, done);
 }
 
 /* Validation Error indexes into the string table below */
@@ -839,7 +896,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 	lsreq->rsplen = sizeof(*assoc_acc);
 	lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
 
-	ret = nvme_fc_send_ls_req(ctrl, lsop);
+	ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
 	if (ret)
 		goto out_free_buffer;
 
@@ -848,11 +905,12 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 	/* validate the ACC response */
 	if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
 		fcret = VERR_LSACC;
-	if (assoc_acc->hdr.desc_list_len !=
+	else if (assoc_acc->hdr.desc_list_len !=
 			fcnvme_lsdesc_len(
 				sizeof(struct fcnvme_ls_cr_assoc_acc)))
 		fcret = VERR_CR_ASSOC_ACC_LEN;
-	if (assoc_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
+	else if (assoc_acc->hdr.rqst.desc_tag !=
+			cpu_to_be32(FCNVME_LSDESC_RQST))
 		fcret = VERR_LSDESC_RQST;
 	else if (assoc_acc->hdr.rqst.desc_len !=
 			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
@@ -946,7 +1004,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	lsreq->rsplen = sizeof(*conn_acc);
 	lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
 
-	ret = nvme_fc_send_ls_req(ctrl, lsop);
+	ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
 	if (ret)
 		goto out_free_buffer;
 
@@ -955,10 +1013,10 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	/* validate the ACC response */
 	if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC)
 		fcret = VERR_LSACC;
-	if (conn_acc->hdr.desc_list_len !=
+	else if (conn_acc->hdr.desc_list_len !=
 			fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)))
 		fcret = VERR_CR_CONN_ACC_LEN;
-	if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
+	else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST))
 		fcret = VERR_LSDESC_RQST;
 	else if (conn_acc->hdr.rqst.desc_len !=
 			fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)))
@@ -997,14 +1055,8 @@ static void
 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
 {
 	struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq);
-	struct nvme_fc_ctrl *ctrl = lsop->ctrl;
 
-	__nvme_fc_finish_ls_req(ctrl, lsop);
-
-	if (status)
-		dev_err(ctrl->dev,
-			"disconnect assoc ls request command failed (%d).\n",
-			status);
+	__nvme_fc_finish_ls_req(lsop);
 
 	/* fc-nvme iniator doesn't care about success or failure of cmd */
 
@@ -1035,6 +1087,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 	struct fcnvme_ls_disconnect_acc *discon_acc;
 	struct nvmefc_ls_req_op *lsop;
 	struct nvmefc_ls_req *lsreq;
+	int ret;
 
 	lsop = kzalloc((sizeof(*lsop) +
 			 ctrl->lport->ops->lsrqst_priv_sz +
@@ -1077,7 +1130,10 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 	lsreq->rsplen = sizeof(*discon_acc);
 	lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
 
-	nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done);
+	ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
+				nvme_fc_disconnect_assoc_done);
+	if (ret)
+		kfree(lsop);
 
 	/* only meaningful part to terminating the association */
 	ctrl->association_id = 0;
@@ -1086,6 +1142,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 
 /* *********************** NVME Ctrl Routines **************************** */
 
+static void __nvme_fc_final_op_cleanup(struct request *rq);
 
 static int
 nvme_fc_reinit_request(void *data, struct request *rq)
@@ -1123,21 +1180,84 @@ nvme_fc_exit_request(void *data, struct request *rq,
 	return __nvme_fc_exit_request(data, op);
 }
 
+static int
+__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
+{
+	int state;
+
+	state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
+	if (state != FCPOP_STATE_ACTIVE) {
+		atomic_set(&op->state, state);
+		return -ECANCELED;
+	}
+
+	ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
+					&ctrl->rport->remoteport,
+					op->queue->lldd_handle,
+					&op->fcp_req);
+
+	return 0;
+}
+
 static void
-nvme_fc_exit_aen_ops(struct nvme_fc_ctrl *ctrl)
+nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
-	int i;
+	unsigned long flags;
+	int i, ret;
 
 	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
-		if (atomic_read(&aen_op->state) == FCPOP_STATE_UNINIT)
+		if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
 			continue;
-		__nvme_fc_exit_request(ctrl, aen_op);
-		nvme_fc_ctrl_put(ctrl);
+
+		spin_lock_irqsave(&ctrl->lock, flags);
+		if (ctrl->flags & FCCTRL_TERMIO) {
+			ctrl->iocnt++;
+			aen_op->flags |= FCOP_FLAGS_TERMIO;
+		}
+		spin_unlock_irqrestore(&ctrl->lock, flags);
+
+		ret = __nvme_fc_abort_op(ctrl, aen_op);
+		if (ret) {
+			/*
+			 * if __nvme_fc_abort_op failed the io wasn't
+			 * active. Thus this call path is running in
+			 * parallel to the io complete. Treat as non-error.
+			 */
+
+			/* back out the flags/counters */
+			spin_lock_irqsave(&ctrl->lock, flags);
+			if (ctrl->flags & FCCTRL_TERMIO)
+				ctrl->iocnt--;
+			aen_op->flags &= ~FCOP_FLAGS_TERMIO;
+			spin_unlock_irqrestore(&ctrl->lock, flags);
+			return;
+		}
 	}
 }
 
-void
+static inline int
+__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
+		struct nvme_fc_fcp_op *op)
+{
+	unsigned long flags;
+	bool complete_rq = false;
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+		if (ctrl->flags & FCCTRL_TERMIO)
+			ctrl->iocnt--;
+	}
+	if (op->flags & FCOP_FLAGS_RELEASED)
+		complete_rq = true;
+	else
+		op->flags |= FCOP_FLAGS_COMPLETE;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	return complete_rq;
+}
+
+static void
 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 {
 	struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req);
@@ -1146,7 +1266,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	struct nvme_fc_ctrl *ctrl = op->ctrl;
 	struct nvme_fc_queue *queue = op->queue;
 	struct nvme_completion *cqe = &op->rsp_iu.cqe;
-	u16 status;
+	struct nvme_command *sqe = &op->cmd_iu.sqe;
+	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
+	union nvme_result result;
+	bool complete_rq;
 
 	/*
 	 * WARNING:
@@ -1181,9 +1304,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 				sizeof(op->rsp_iu), DMA_FROM_DEVICE);
 
 	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
-		status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
-	else
-		status = freq->status;
+		status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
+	else if (freq->status)
+		status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
 
 	/*
 	 * For the linux implementation, if we have an unsuccesful
@@ -1211,10 +1334,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		 */
 		if (freq->transferred_length !=
 			be32_to_cpu(op->cmd_iu.data_len)) {
-			status = -EIO;
+			status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
 			goto done;
 		}
-		op->nreq.result.u64 = 0;
+		result.u64 = 0;
 		break;
 
 	case sizeof(struct nvme_fc_ersp_iu):
@@ -1226,28 +1349,40 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 					(freq->rcv_rsplen / 4) ||
 			     be32_to_cpu(op->rsp_iu.xfrd_len) !=
 					freq->transferred_length ||
-			     op->rqno != le16_to_cpu(cqe->command_id))) {
-			status = -EIO;
+			     op->rsp_iu.status_code ||
+			     sqe->common.command_id != cqe->command_id)) {
+			status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
 			goto done;
 		}
-		op->nreq.result = cqe->result;
-		status = le16_to_cpu(cqe->status) >> 1;
+		result = cqe->result;
+		status = cqe->status;
 		break;
 
 	default:
-		status = -EIO;
+		status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
 		goto done;
 	}
 
 done:
-	if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) {
-		nvme_complete_async_event(&queue->ctrl->ctrl, status,
-					&op->nreq.result);
+	if (op->flags & FCOP_FLAGS_AEN) {
+		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
+		complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+		atomic_set(&op->state, FCPOP_STATE_IDLE);
+		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */
 		nvme_fc_ctrl_put(ctrl);
 		return;
 	}
 
-	blk_mq_complete_request(rq, status);
+	complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+	if (!complete_rq) {
+		if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+			status = cpu_to_le16(NVME_SC_ABORT_REQ);
+			if (blk_queue_dying(rq->q))
+				status |= cpu_to_le16(NVME_SC_DNR);
+		}
+		nvme_end_request(rq, status, result);
+	} else
+		__nvme_fc_final_op_cleanup(rq);
 }
 
 static int
@@ -1328,25 +1463,55 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
 	struct nvme_fc_fcp_op *aen_op;
 	struct nvme_fc_cmd_iu *cmdiu;
 	struct nvme_command *sqe;
+	void *private;
 	int i, ret;
 
 	aen_op = ctrl->aen_ops;
 	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+		private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
+						GFP_KERNEL);
+		if (!private)
+			return -ENOMEM;
+
 		cmdiu = &aen_op->cmd_iu;
 		sqe = &cmdiu->sqe;
 		ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
 				aen_op, (struct request *)NULL,
 				(AEN_CMDID_BASE + i));
-		if (ret)
+		if (ret) {
+			kfree(private);
 			return ret;
+		}
+
+		aen_op->flags = FCOP_FLAGS_AEN;
+		aen_op->fcp_req.first_sgl = NULL; /* no sg list */
+		aen_op->fcp_req.private = private;
 
 		memset(sqe, 0, sizeof(*sqe));
 		sqe->common.opcode = nvme_admin_async_event;
+		/* Note: core layer may overwrite the sqe.command_id value */
 		sqe->common.command_id = AEN_CMDID_BASE + i;
 	}
 	return 0;
 }
 
+static void
+nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvme_fc_fcp_op *aen_op;
+	int i;
+
+	aen_op = ctrl->aen_ops;
+	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+		if (!aen_op->fcp_req.private)
+			continue;
+
+		__nvme_fc_exit_request(ctrl, aen_op);
+
+		kfree(aen_op->fcp_req.private);
+		aen_op->fcp_req.private = NULL;
+	}
+}
 
 static inline void
 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl,
@@ -1446,15 +1611,6 @@ __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl,
 }
 
 static void
-nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl)
-{
-	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
-	blk_mq_free_tag_set(&ctrl->admin_tag_set);
-	nvme_fc_free_queue(&ctrl->queues[0]);
-}
-
-static void
 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
@@ -1541,19 +1697,27 @@ nvme_fc_ctrl_free(struct kref *ref)
 		container_of(ref, struct nvme_fc_ctrl, ref);
 	unsigned long flags;
 
-	if (ctrl->state != FCCTRL_INIT) {
-		/* remove from rport list */
-		spin_lock_irqsave(&ctrl->rport->lock, flags);
-		list_del(&ctrl->ctrl_list);
-		spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+	if (ctrl->ctrl.tagset) {
+		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_free_tag_set(&ctrl->tag_set);
 	}
 
+	/* remove from rport list */
+	spin_lock_irqsave(&ctrl->rport->lock, flags);
+	list_del(&ctrl->ctrl_list);
+	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+
+	kfree(ctrl->queues);
+
 	put_device(ctrl->dev);
 	nvme_fc_rport_put(ctrl->rport);
 
-	kfree(ctrl->queues);
 	ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
-	nvmf_free_options(ctrl->ctrl.opts);
+	if (ctrl->ctrl.opts)
+		nvmf_free_options(ctrl->ctrl.opts);
 	kfree(ctrl);
 }
 
@@ -1574,57 +1738,38 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
  * controller. Called after last nvme_put_ctrl() call
  */
 static void
-nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl)
+nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
 	WARN_ON(nctrl != &ctrl->ctrl);
 
-	/*
-	 * Tear down the association, which will generate link
-	 * traffic to terminate connections
-	 */
-
-	if (ctrl->state != FCCTRL_INIT) {
-		/* send a Disconnect(association) LS to fc-nvme target */
-		nvme_fc_xmt_disconnect_assoc(ctrl);
-
-		if (ctrl->ctrl.tagset) {
-			blk_cleanup_queue(ctrl->ctrl.connect_q);
-			blk_mq_free_tag_set(&ctrl->tag_set);
-			nvme_fc_delete_hw_io_queues(ctrl);
-			nvme_fc_free_io_queues(ctrl);
-		}
-
-		nvme_fc_exit_aen_ops(ctrl);
-
-		nvme_fc_destroy_admin_queue(ctrl);
-	}
-
 	nvme_fc_ctrl_put(ctrl);
 }
 
-
-static int
-__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
+static void
+nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 {
-	int state;
+	dev_warn(ctrl->ctrl.device,
+		"NVME-FC{%d}: transport association error detected: %s\n",
+		ctrl->cnum, errmsg);
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
-	state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
-	if (state != FCPOP_STATE_ACTIVE) {
-		atomic_set(&op->state, state);
-		return -ECANCELED; /* fail */
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+		dev_err(ctrl->ctrl.device,
+			"NVME-FC{%d}: error_recovery: Couldn't change state "
+			"to RECONNECTING\n", ctrl->cnum);
+		return;
 	}
 
-	ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
-					&ctrl->rport->remoteport,
-					op->queue->lldd_handle,
-					&op->fcp_req);
-
-	return 0;
+	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
+		dev_err(ctrl->ctrl.device,
+			"NVME-FC{%d}: error_recovery: Failed to schedule "
+			"reset work\n", ctrl->cnum);
 }
 
-enum blk_eh_timer_return
+static enum blk_eh_timer_return
 nvme_fc_timeout(struct request *rq, bool reserved)
 {
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
@@ -1640,11 +1785,13 @@ nvme_fc_timeout(struct request *rq, bool reserved)
 		return BLK_EH_HANDLED;
 
 	/*
-	 * TODO: force a controller reset
-	 *   when that happens, queues will be torn down and outstanding
-	 *   ios will be terminated, and the above abort, on a single io
-	 *   will no longer be needed.
+	 * we can't individually ABTS an io without affecting the queue,
+	 * thus killing the queue, adn thus the association.
+	 * So resolve by performing a controller reset, which will stop
+	 * the host/io stack, terminate the association on the link,
+	 * and recreate an association on the link.
 	 */
+	nvme_fc_error_recovery(ctrl, "io timeout error");
 
 	return BLK_EH_HANDLED;
 }
@@ -1738,6 +1885,13 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	u32 csn;
 	int ret;
 
+	/*
+	 * before attempting to send the io, check to see if we believe
+	 * the target device is present
+	 */
+	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
+		return BLK_MQ_RQ_QUEUE_ERROR;
+
 	if (!nvme_fc_ctrl_get(ctrl))
 		return BLK_MQ_RQ_QUEUE_ERROR;
 
@@ -1761,7 +1915,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	op->fcp_req.io_dir = io_dir;
 	op->fcp_req.transferred_length = 0;
 	op->fcp_req.rcv_rsplen = 0;
-	op->fcp_req.status = 0;
+	op->fcp_req.status = NVME_SC_SUCCESS;
 	op->fcp_req.sqid = cpu_to_le16(queue->qnum);
 
 	/*
@@ -1782,14 +1936,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
 	sqe->rw.dptr.sgl.addr = 0;
 
-	/* odd that we set the command_id - should come from nvme-fabrics */
-	WARN_ON_ONCE(sqe->common.command_id != cpu_to_le16(op->rqno));
-
-	if (op->rq) {				/* skipped on aens */
+	if (!(op->flags & FCOP_FLAGS_AEN)) {
 		ret = nvme_fc_map_data(ctrl, op->rq, op);
 		if (ret < 0) {
-			dev_err(queue->ctrl->ctrl.device,
-			     "Failed to map data (%d)\n", ret);
 			nvme_cleanup_cmd(op->rq);
 			nvme_fc_ctrl_put(ctrl);
 			return (ret == -ENOMEM || ret == -EAGAIN) ?
@@ -1802,7 +1951,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 
 	atomic_set(&op->state, FCPOP_STATE_ACTIVE);
 
-	if (op->rq)
+	if (!(op->flags & FCOP_FLAGS_AEN))
 		blk_mq_start_request(op->rq);
 
 	ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
@@ -1810,9 +1959,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 					queue->lldd_handle, &op->fcp_req);
 
 	if (ret) {
-		dev_err(ctrl->dev,
-			"Send nvme command failed - lldd returned %d.\n", ret);
-
 		if (op->rq) {			/* normal request */
 			nvme_fc_unmap_data(ctrl, op->rq, op);
 			nvme_cleanup_cmd(op->rq);
@@ -1882,12 +2028,8 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	struct nvme_fc_fcp_op *op;
 
 	req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag);
-	if (!req) {
-		dev_err(queue->ctrl->ctrl.device,
-			 "tag 0x%x on QNum %#x not found\n",
-			tag, queue->qnum);
+	if (!req)
 		return 0;
-	}
 
 	op = blk_mq_rq_to_pdu(req);
 
@@ -1904,11 +2046,21 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
 	struct nvme_fc_fcp_op *aen_op;
+	unsigned long flags;
+	bool terminating = false;
 	int ret;
 
 	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
 		return;
 
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (ctrl->flags & FCCTRL_TERMIO)
+		terminating = true;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	if (terminating)
+		return;
+
 	aen_op = &ctrl->aen_ops[aer_idx];
 
 	ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
@@ -1919,36 +2071,101 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 }
 
 static void
-nvme_fc_complete_rq(struct request *rq)
+__nvme_fc_final_op_cleanup(struct request *rq)
 {
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
 	struct nvme_fc_ctrl *ctrl = op->ctrl;
-	int error = 0, state;
 
-	state = atomic_xchg(&op->state, FCPOP_STATE_IDLE);
+	atomic_set(&op->state, FCPOP_STATE_IDLE);
+	op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
+			FCOP_FLAGS_COMPLETE);
 
 	nvme_cleanup_cmd(rq);
-
 	nvme_fc_unmap_data(ctrl, rq, op);
+	nvme_complete_rq(rq);
+	nvme_fc_ctrl_put(ctrl);
 
-	if (unlikely(rq->errors)) {
-		if (nvme_req_needs_retry(rq, rq->errors)) {
-			nvme_requeue_req(rq);
-			return;
-		}
+}
 
-		if (blk_rq_is_passthrough(rq))
-			error = rq->errors;
-		else
-			error = nvme_error_status(rq->errors);
+static void
+nvme_fc_complete_rq(struct request *rq)
+{
+	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
+	struct nvme_fc_ctrl *ctrl = op->ctrl;
+	unsigned long flags;
+	bool completed = false;
+
+	/*
+	 * the core layer, on controller resets after calling
+	 * nvme_shutdown_ctrl(), calls complete_rq without our
+	 * calling blk_mq_complete_request(), thus there may still
+	 * be live i/o outstanding with the LLDD. Means transport has
+	 * to track complete calls vs fcpio_done calls to know what
+	 * path to take on completes and dones.
+	 */
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (op->flags & FCOP_FLAGS_COMPLETE)
+		completed = true;
+	else
+		op->flags |= FCOP_FLAGS_RELEASED;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	if (completed)
+		__nvme_fc_final_op_cleanup(rq);
+}
+
+/*
+ * This routine is used by the transport when it needs to find active
+ * io on a queue that is to be terminated. The transport uses
+ * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
+ * this routine to kill them on a 1 by 1 basis.
+ *
+ * As FC allocates FC exchange for each io, the transport must contact
+ * the LLDD to terminate the exchange, thus releasing the FC exchange.
+ * After terminating the exchange the LLDD will call the transport's
+ * normal io done path for the request, but it will have an aborted
+ * status. The done path will return the io request back to the block
+ * layer with an error status.
+ */
+static void
+nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
+{
+	struct nvme_ctrl *nctrl = data;
+	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
+	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
+	unsigned long flags;
+	int status;
+
+	if (!blk_mq_request_started(req))
+		return;
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (ctrl->flags & FCCTRL_TERMIO) {
+		ctrl->iocnt++;
+		op->flags |= FCOP_FLAGS_TERMIO;
 	}
+	spin_unlock_irqrestore(&ctrl->lock, flags);
 
-	nvme_fc_ctrl_put(ctrl);
+	status = __nvme_fc_abort_op(ctrl, op);
+	if (status) {
+		/*
+		 * if __nvme_fc_abort_op failed the io wasn't
+		 * active. Thus this call path is running in
+		 * parallel to the io complete. Treat as non-error.
+		 */
 
-	blk_mq_end_request(rq, error);
+		/* back out the flags/counters */
+		spin_lock_irqsave(&ctrl->lock, flags);
+		if (ctrl->flags & FCCTRL_TERMIO)
+			ctrl->iocnt--;
+		op->flags &= ~FCOP_FLAGS_TERMIO;
+		spin_unlock_irqrestore(&ctrl->lock, flags);
+		return;
+	}
 }
 
-static struct blk_mq_ops nvme_fc_mq_ops = {
+
+static const struct blk_mq_ops nvme_fc_mq_ops = {
 	.queue_rq	= nvme_fc_queue_rq,
 	.complete	= nvme_fc_complete_rq,
 	.init_request	= nvme_fc_init_request,
@@ -1959,145 +2176,275 @@ static struct blk_mq_ops nvme_fc_mq_ops = {
 	.timeout	= nvme_fc_timeout,
 };
 
-static struct blk_mq_ops nvme_fc_admin_mq_ops = {
-	.queue_rq	= nvme_fc_queue_rq,
-	.complete	= nvme_fc_complete_rq,
-	.init_request	= nvme_fc_init_admin_request,
-	.exit_request	= nvme_fc_exit_request,
-	.reinit_request	= nvme_fc_reinit_request,
-	.init_hctx	= nvme_fc_init_admin_hctx,
-	.timeout	= nvme_fc_timeout,
-};
-
 static int
-nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl)
+nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 {
-	u32 segs;
-	int error;
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	int ret;
 
-	nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"set_queue_count failed: %d\n", ret);
+		return ret;
+	}
 
-	error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-				NVME_FC_AQ_BLKMQ_DEPTH,
-				(NVME_FC_AQ_BLKMQ_DEPTH / 4));
-	if (error)
-		return error;
+	ctrl->queue_count = opts->nr_io_queues + 1;
+	if (!opts->nr_io_queues)
+		return 0;
 
-	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
-	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
-	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
-	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
-	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
-	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
+	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
+			opts->nr_io_queues);
+
+	nvme_fc_init_io_queues(ctrl);
+
+	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
+	ctrl->tag_set.ops = &nvme_fc_mq_ops;
+	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
+	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
+	ctrl->tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+	ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
 					(SG_CHUNK_SIZE *
 						sizeof(struct scatterlist)) +
 					ctrl->lport->ops->fcprqst_priv_sz;
-	ctrl->admin_tag_set.driver_data = ctrl;
-	ctrl->admin_tag_set.nr_hw_queues = 1;
-	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+	ctrl->tag_set.driver_data = ctrl;
+	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
-	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
-	if (error)
-		goto out_free_queue;
+	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
+	if (ret)
+		return ret;
 
-	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
-	if (IS_ERR(ctrl->ctrl.admin_q)) {
-		error = PTR_ERR(ctrl->ctrl.admin_q);
-		goto out_free_tagset;
+	ctrl->ctrl.tagset = &ctrl->tag_set;
+
+	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
+	if (IS_ERR(ctrl->ctrl.connect_q)) {
+		ret = PTR_ERR(ctrl->ctrl.connect_q);
+		goto out_free_tag_set;
+	}
+
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_cleanup_blk_queue;
+
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_delete_hw_queues;
+
+	return 0;
+
+out_delete_hw_queues:
+	nvme_fc_delete_hw_io_queues(ctrl);
+out_cleanup_blk_queue:
+	nvme_stop_keep_alive(&ctrl->ctrl);
+	blk_cleanup_queue(ctrl->ctrl.connect_q);
+out_free_tag_set:
+	blk_mq_free_tag_set(&ctrl->tag_set);
+	nvme_fc_free_io_queues(ctrl);
+
+	/* force put free routine to ignore io queues */
+	ctrl->ctrl.tagset = NULL;
+
+	return ret;
+}
+
+static int
+nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	int ret;
+
+	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"set_queue_count failed: %d\n", ret);
+		return ret;
 	}
 
-	error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
+	/* check for io queues existing */
+	if (ctrl->queue_count == 1)
+		return 0;
+
+	dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
+			opts->nr_io_queues);
+
+	nvme_fc_init_io_queues(ctrl);
+
+	ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+	if (ret)
+		goto out_free_io_queues;
+
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_free_io_queues;
+
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_delete_hw_queues;
+
+	return 0;
+
+out_delete_hw_queues:
+	nvme_fc_delete_hw_io_queues(ctrl);
+out_free_io_queues:
+	nvme_fc_free_io_queues(ctrl);
+	return ret;
+}
+
+/*
+ * This routine restarts the controller on the host side, and
+ * on the link side, recreates the controller association.
+ */
+static int
+nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	u32 segs;
+	int ret;
+	bool changed;
+
+	ctrl->connect_attempts++;
+
+	/*
+	 * Create the admin queue
+	 */
+
+	nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+
+	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
 				NVME_FC_AQ_BLKMQ_DEPTH);
-	if (error)
-		goto out_cleanup_queue;
+	if (ret)
+		goto out_free_queue;
 
-	error = nvmf_connect_admin_queue(&ctrl->ctrl);
-	if (error)
+	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
+				NVME_FC_AQ_BLKMQ_DEPTH,
+				(NVME_FC_AQ_BLKMQ_DEPTH / 4));
+	if (ret)
 		goto out_delete_hw_queue;
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
-	if (error) {
+	if (ctrl->ctrl.state != NVME_CTRL_NEW)
+		blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+
+	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
+	if (ret)
+		goto out_disconnect_admin_queue;
+
+	/*
+	 * Check controller capabilities
+	 *
+	 * todo:- add code to check if ctrl attributes changed from
+	 * prior connection values
+	 */
+
+	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	if (ret) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
-		goto out_delete_hw_queue;
+		goto out_disconnect_admin_queue;
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
-	if (error)
-		goto out_delete_hw_queue;
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	if (ret)
+		goto out_disconnect_admin_queue;
 
 	segs = min_t(u32, NVME_FC_MAX_SEGMENTS,
 			ctrl->lport->ops->max_sgl_segments);
 	ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9);
 
-	error = nvme_init_identify(&ctrl->ctrl);
-	if (error)
-		goto out_delete_hw_queue;
+	ret = nvme_init_identify(&ctrl->ctrl);
+	if (ret)
+		goto out_disconnect_admin_queue;
+
+	/* sanity checks */
+
+	/* FC-NVME does not have other data in the capsule */
+	if (ctrl->ctrl.icdoff) {
+		dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
+				ctrl->ctrl.icdoff);
+		goto out_disconnect_admin_queue;
+	}
 
 	nvme_start_keep_alive(&ctrl->ctrl);
 
-	return 0;
+	/* FC-NVME supports normal SGL Data Block Descriptors */
 
+	if (opts->queue_size > ctrl->ctrl.maxcmd) {
+		/* warn if maxcmd is lower than queue_size */
+		dev_warn(ctrl->ctrl.device,
+			"queue_size %zu > ctrl maxcmd %u, reducing "
+			"to queue_size\n",
+			opts->queue_size, ctrl->ctrl.maxcmd);
+		opts->queue_size = ctrl->ctrl.maxcmd;
+	}
+
+	ret = nvme_fc_init_aen_ops(ctrl);
+	if (ret)
+		goto out_term_aen_ops;
+
+	/*
+	 * Create the io queues
+	 */
+
+	if (ctrl->queue_count > 1) {
+		if (ctrl->ctrl.state == NVME_CTRL_NEW)
+			ret = nvme_fc_create_io_queues(ctrl);
+		else
+			ret = nvme_fc_reinit_io_queues(ctrl);
+		if (ret)
+			goto out_term_aen_ops;
+	}
+
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	WARN_ON_ONCE(!changed);
+
+	ctrl->connect_attempts = 0;
+
+	kref_get(&ctrl->ctrl.kref);
+
+	if (ctrl->queue_count > 1) {
+		nvme_start_queues(&ctrl->ctrl);
+		nvme_queue_scan(&ctrl->ctrl);
+		nvme_queue_async_events(&ctrl->ctrl);
+	}
+
+	return 0;	/* Success */
+
+out_term_aen_ops:
+	nvme_fc_term_aen_ops(ctrl);
+	nvme_stop_keep_alive(&ctrl->ctrl);
+out_disconnect_admin_queue:
+	/* send a Disconnect(association) LS to fc-nvme target */
+	nvme_fc_xmt_disconnect_assoc(ctrl);
 out_delete_hw_queue:
 	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
-out_cleanup_queue:
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
-out_free_tagset:
-	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 out_free_queue:
 	nvme_fc_free_queue(&ctrl->queues[0]);
-	return error;
+
+	return ret;
 }
 
 /*
- * This routine is used by the transport when it needs to find active
- * io on a queue that is to be terminated. The transport uses
- * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
- * this routine to kill them on a 1 by 1 basis.
- *
- * As FC allocates FC exchange for each io, the transport must contact
- * the LLDD to terminate the exchange, thus releasing the FC exchange.
- * After terminating the exchange the LLDD will call the transport's
- * normal io done path for the request, but it will have an aborted
- * status. The done path will return the io request back to the block
- * layer with an error status.
+ * This routine stops operation of the controller on the host side.
+ * On the host os stack side: Admin and IO queues are stopped,
+ *   outstanding ios on them terminated via FC ABTS.
+ * On the link side: the association is terminated.
  */
 static void
-nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
+nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_ctrl *nctrl = data;
-	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
-int status;
-
-	if (!blk_mq_request_started(req))
-		return;
+	unsigned long flags;
 
-	/* this performs an ABTS-LS on the FC exchange for the io */
-	status = __nvme_fc_abort_op(ctrl, op);
-	/*
-	 * if __nvme_fc_abort_op failed: io wasn't active to abort
-	 * consider it done. Assume completion path already completing
-	 * in parallel
-	 */
-	if (status)
-		/* io wasn't active to abort consider it done */
-		/* assume completion path already completing in parallel */
-		return;
-}
+	nvme_stop_keep_alive(&ctrl->ctrl);
 
+	spin_lock_irqsave(&ctrl->lock, flags);
+	ctrl->flags |= FCCTRL_TERMIO;
+	ctrl->iocnt = 0;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
 
-/*
- * This routine stops operation of the controller. Admin and IO queues
- * are stopped, outstanding ios on them terminated, and the nvme ctrl
- * is shutdown.
- */
-static void
-nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl)
-{
 	/*
 	 * If io queues are present, stop them and terminate all outstanding
 	 * ios on them. As FC allocates FC exchange for each io, the
@@ -2116,35 +2463,79 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl)
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 	}
 
-	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
-		nvme_shutdown_ctrl(&ctrl->ctrl);
+	/*
+	 * Other transports, which don't have link-level contexts bound
+	 * to sqe's, would try to gracefully shutdown the controller by
+	 * writing the registers for shutdown and polling (call
+	 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
+	 * just aborted and we will wait on those contexts, and given
+	 * there was no indication of how live the controlelr is on the
+	 * link, don't send more io to create more contexts for the
+	 * shutdown. Let the controller fail via keepalive failure if
+	 * its still present.
+	 */
 
 	/*
-	 * now clean up the admin queue. Same thing as above.
+	 * clean up the admin queue. Same thing as above.
 	 * use blk_mq_tagset_busy_itr() and the transport routine to
 	 * terminate the exchanges.
 	 */
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
+
+	/* kill the aens as they are a separate path */
+	nvme_fc_abort_aen_ops(ctrl);
+
+	/* wait for all io that had to be aborted */
+	spin_lock_irqsave(&ctrl->lock, flags);
+	while (ctrl->iocnt) {
+		spin_unlock_irqrestore(&ctrl->lock, flags);
+		msleep(1000);
+		spin_lock_irqsave(&ctrl->lock, flags);
+	}
+	ctrl->flags &= ~FCCTRL_TERMIO;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	nvme_fc_term_aen_ops(ctrl);
+
+	/*
+	 * send a Disconnect(association) LS to fc-nvme target
+	 * Note: could have been sent at top of process, but
+	 * cleaner on link traffic if after the aborts complete.
+	 * Note: if association doesn't exist, association_id will be 0
+	 */
+	if (ctrl->association_id)
+		nvme_fc_xmt_disconnect_assoc(ctrl);
+
+	if (ctrl->ctrl.tagset) {
+		nvme_fc_delete_hw_io_queues(ctrl);
+		nvme_fc_free_io_queues(ctrl);
+	}
+
+	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
+	nvme_fc_free_queue(&ctrl->queues[0]);
 }
 
-/*
- * Called to teardown an association.
- * May be called with association fully in place or partially in place.
- */
 static void
-__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl)
+nvme_fc_delete_ctrl_work(struct work_struct *work)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
+	struct nvme_fc_ctrl *ctrl =
+		container_of(work, struct nvme_fc_ctrl, delete_work);
 
-	/* stop and terminate ios on admin and io queues */
-	nvme_fc_shutdown_ctrl(ctrl);
+	cancel_work_sync(&ctrl->reset_work);
+	cancel_delayed_work_sync(&ctrl->connect_work);
+
+	/*
+	 * kill the association on the link side.  this will block
+	 * waiting for io to terminate
+	 */
+	nvme_fc_delete_association(ctrl);
 
 	/*
 	 * tear down the controller
 	 * This will result in the last reference on the nvme ctrl to
-	 * expire, calling the transport nvme_fc_free_nvme_ctrl() callback.
+	 * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
 	 * From there, the transport will tear down it's logical queues and
 	 * association.
 	 */
@@ -2153,15 +2544,6 @@ __nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl)
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static void
-nvme_fc_del_ctrl_work(struct work_struct *work)
-{
-	struct nvme_fc_ctrl *ctrl =
-			container_of(work, struct nvme_fc_ctrl, delete_work);
-
-	__nvme_fc_remove_ctrl(ctrl);
-}
-
 static int
 __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
 {
@@ -2181,25 +2563,85 @@ static int
 nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-	struct nvme_fc_rport *rport = ctrl->rport;
-	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&rport->lock, flags);
+	if (!kref_get_unless_zero(&ctrl->ctrl.kref))
+		return -EBUSY;
+
 	ret = __nvme_fc_del_ctrl(ctrl);
-	spin_unlock_irqrestore(&rport->lock, flags);
-	if (ret)
-		return ret;
 
-	flush_work(&ctrl->delete_work);
+	if (!ret)
+		flush_workqueue(nvme_fc_wq);
 
-	return 0;
+	nvme_put_ctrl(&ctrl->ctrl);
+
+	return ret;
+}
+
+static void
+nvme_fc_reset_ctrl_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+			container_of(work, struct nvme_fc_ctrl, reset_work);
+	int ret;
+
+	/* will block will waiting for io to terminate */
+	nvme_fc_delete_association(ctrl);
+
+	ret = nvme_fc_create_association(ctrl);
+	if (ret) {
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+			ctrl->cnum, ret);
+		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: Max reconnect attempts (%d) "
+				"reached. Removing controller\n",
+				ctrl->cnum, ctrl->connect_attempts);
+
+			if (!nvme_change_ctrl_state(&ctrl->ctrl,
+				NVME_CTRL_DELETING)) {
+				dev_err(ctrl->ctrl.device,
+					"NVME-FC{%d}: failed to change state "
+					"to DELETING\n", ctrl->cnum);
+				return;
+			}
+
+			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
+			return;
+		}
+
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->reconnect_delay);
+		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+				ctrl->reconnect_delay * HZ);
+	} else
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
 }
 
+/*
+ * called by the nvme core layer, for sysfs interface that requests
+ * a reset of the nvme controller
+ */
 static int
 nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
 {
-	return -EIO;
+	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
+
+	dev_warn(ctrl->ctrl.device,
+		"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
+
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+		return -EBUSY;
+
+	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
+		return -EBUSY;
+
+	flush_work(&ctrl->reset_work);
+
+	return 0;
 }
 
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
@@ -2210,95 +2652,75 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
 	.reset_ctrl		= nvme_fc_reset_nvme_ctrl,
-	.free_ctrl		= nvme_fc_free_nvme_ctrl,
+	.free_ctrl		= nvme_fc_nvme_ctrl_freed,
 	.submit_async_event	= nvme_fc_submit_async_event,
 	.delete_ctrl		= nvme_fc_del_nvme_ctrl,
 	.get_subsysnqn		= nvmf_get_subsysnqn,
 	.get_address		= nvmf_get_address,
 };
 
-static int
-nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
+static void
+nvme_fc_connect_ctrl_work(struct work_struct *work)
 {
-	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-	if (ret) {
-		dev_info(ctrl->ctrl.device,
-			"set_queue_count failed: %d\n", ret);
-		return ret;
-	}
-
-	ctrl->queue_count = opts->nr_io_queues + 1;
-	if (!opts->nr_io_queues)
-		return 0;
-
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-			opts->nr_io_queues);
-
-	nvme_fc_init_io_queues(ctrl);
-
-	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
-	ctrl->tag_set.ops = &nvme_fc_mq_ops;
-	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
-	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
-	ctrl->tag_set.numa_node = NUMA_NO_NODE;
-	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-	ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
-					(SG_CHUNK_SIZE *
-						sizeof(struct scatterlist)) +
-					ctrl->lport->ops->fcprqst_priv_sz;
-	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
-	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
-
-	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
-	if (ret)
-		return ret;
-
-	ctrl->ctrl.tagset = &ctrl->tag_set;
-
-	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
-	if (IS_ERR(ctrl->ctrl.connect_q)) {
-		ret = PTR_ERR(ctrl->ctrl.connect_q);
-		goto out_free_tag_set;
-	}
-
-	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
-	if (ret)
-		goto out_cleanup_blk_queue;
+	struct nvme_fc_ctrl *ctrl =
+			container_of(to_delayed_work(work),
+				struct nvme_fc_ctrl, connect_work);
 
-	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
-	if (ret)
-		goto out_delete_hw_queues;
+	ret = nvme_fc_create_association(ctrl);
+	if (ret) {
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
+			ctrl->cnum, ret);
+		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: Max reconnect attempts (%d) "
+				"reached. Removing controller\n",
+				ctrl->cnum, ctrl->connect_attempts);
+
+			if (!nvme_change_ctrl_state(&ctrl->ctrl,
+				NVME_CTRL_DELETING)) {
+				dev_err(ctrl->ctrl.device,
+					"NVME-FC{%d}: failed to change state "
+					"to DELETING\n", ctrl->cnum);
+				return;
+			}
 
-	return 0;
+			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
+			return;
+		}
 
-out_delete_hw_queues:
-	nvme_fc_delete_hw_io_queues(ctrl);
-out_cleanup_blk_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
-	blk_cleanup_queue(ctrl->ctrl.connect_q);
-out_free_tag_set:
-	blk_mq_free_tag_set(&ctrl->tag_set);
-	nvme_fc_free_io_queues(ctrl);
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->reconnect_delay);
+		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+				ctrl->reconnect_delay * HZ);
+	} else
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: controller reconnect complete\n",
+			ctrl->cnum);
+}
 
-	/* force put free routine to ignore io queues */
-	ctrl->ctrl.tagset = NULL;
 
-	return ret;
-}
+static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
+	.queue_rq	= nvme_fc_queue_rq,
+	.complete	= nvme_fc_complete_rq,
+	.init_request	= nvme_fc_init_admin_request,
+	.exit_request	= nvme_fc_exit_request,
+	.reinit_request	= nvme_fc_reinit_request,
+	.init_hctx	= nvme_fc_init_admin_hctx,
+	.timeout	= nvme_fc_timeout,
+};
 
 
 static struct nvme_ctrl *
-__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
 {
 	struct nvme_fc_ctrl *ctrl;
 	unsigned long flags;
 	int ret, idx;
-	bool changed;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
@@ -2314,21 +2736,18 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	ctrl->ctrl.opts = opts;
 	INIT_LIST_HEAD(&ctrl->ctrl_list);
-	INIT_LIST_HEAD(&ctrl->ls_req_list);
 	ctrl->lport = lport;
 	ctrl->rport = rport;
 	ctrl->dev = lport->dev;
-	ctrl->state = FCCTRL_INIT;
 	ctrl->cnum = idx;
 
-	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
-	if (ret)
-		goto out_free_ida;
-
 	get_device(ctrl->dev);
 	kref_init(&ctrl->ref);
 
-	INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work);
+	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
+	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+	ctrl->reconnect_delay = opts->reconnect_delay;
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -2345,87 +2764,87 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
 				GFP_KERNEL);
 	if (!ctrl->queues)
-		goto out_uninit_ctrl;
-
-	ret = nvme_fc_configure_admin_queue(ctrl);
-	if (ret)
-		goto out_uninit_ctrl;
-
-	/* sanity checks */
-
-	/* FC-NVME does not have other data in the capsule */
-	if (ctrl->ctrl.icdoff) {
-		dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
-				ctrl->ctrl.icdoff);
-		goto out_remove_admin_queue;
-	}
-
-	/* FC-NVME supports normal SGL Data Block Descriptors */
+		goto out_free_ida;
 
-	if (opts->queue_size > ctrl->ctrl.maxcmd) {
-		/* warn if maxcmd is lower than queue_size */
-		dev_warn(ctrl->ctrl.device,
-			"queue_size %zu > ctrl maxcmd %u, reducing "
-			"to queue_size\n",
-			opts->queue_size, ctrl->ctrl.maxcmd);
-		opts->queue_size = ctrl->ctrl.maxcmd;
-	}
+	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
+	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
+	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
+	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
+					(SG_CHUNK_SIZE *
+						sizeof(struct scatterlist)) +
+					ctrl->lport->ops->fcprqst_priv_sz;
+	ctrl->admin_tag_set.driver_data = ctrl;
+	ctrl->admin_tag_set.nr_hw_queues = 1;
+	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
 
-	ret = nvme_fc_init_aen_ops(ctrl);
+	ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
 	if (ret)
-		goto out_exit_aen_ops;
+		goto out_free_queues;
 
-	if (ctrl->queue_count > 1) {
-		ret = nvme_fc_create_io_queues(ctrl);
-		if (ret)
-			goto out_exit_aen_ops;
+	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+	if (IS_ERR(ctrl->ctrl.admin_q)) {
+		ret = PTR_ERR(ctrl->ctrl.admin_q);
+		goto out_free_admin_tag_set;
 	}
 
-	spin_lock_irqsave(&ctrl->lock, flags);
-	ctrl->state = FCCTRL_ACTIVE;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
-	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-	WARN_ON_ONCE(!changed);
+	/*
+	 * Would have been nice to init io queues tag set as well.
+	 * However, we require interaction from the controller
+	 * for max io queue count before we can do so.
+	 * Defer this to the connect path.
+	 */
 
-	dev_info(ctrl->ctrl.device,
-		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
-		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
+	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
+	if (ret)
+		goto out_cleanup_admin_q;
 
-	kref_get(&ctrl->ctrl.kref);
+	/* at this point, teardown path changes to ref counting on nvme ctrl */
 
 	spin_lock_irqsave(&rport->lock, flags);
 	list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
 	spin_unlock_irqrestore(&rport->lock, flags);
 
-	if (opts->nr_io_queues) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
+	ret = nvme_fc_create_association(ctrl);
+	if (ret) {
+		ctrl->ctrl.opts = NULL;
+		/* initiate nvme ctrl ref counting teardown */
+		nvme_uninit_ctrl(&ctrl->ctrl);
+		nvme_put_ctrl(&ctrl->ctrl);
+
+		/* as we're past the point where we transition to the ref
+		 * counting teardown path, if we return a bad pointer here,
+		 * the calling routine, thinking it's prior to the
+		 * transition, will do an rport put. Since the teardown
+		 * path also does a rport put, we do an extra get here to
+		 * so proper order/teardown happens.
+		 */
+		nvme_fc_rport_get(rport);
+
+		if (ret > 0)
+			ret = -EIO;
+		return ERR_PTR(ret);
 	}
 
-	return &ctrl->ctrl;
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
+		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
 
-out_exit_aen_ops:
-	nvme_fc_exit_aen_ops(ctrl);
-out_remove_admin_queue:
-	/* send a Disconnect(association) LS to fc-nvme target */
-	nvme_fc_xmt_disconnect_assoc(ctrl);
-	nvme_stop_keep_alive(&ctrl->ctrl);
-	nvme_fc_destroy_admin_queue(ctrl);
-out_uninit_ctrl:
-	nvme_uninit_ctrl(&ctrl->ctrl);
-	nvme_put_ctrl(&ctrl->ctrl);
-	if (ret > 0)
-		ret = -EIO;
-	/* exit via here will follow ctlr ref point callbacks to free */
-	return ERR_PTR(ret);
+	return &ctrl->ctrl;
 
+out_cleanup_admin_q:
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_free_admin_tag_set:
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+out_free_queues:
+	kfree(ctrl->queues);
 out_free_ida:
+	put_device(ctrl->dev);
 	ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
 out_free_ctrl:
 	kfree(ctrl);
 out_fail:
-	nvme_fc_rport_put(rport);
 	/* exit via here doesn't follow ctlr ref points */
 	return ERR_PTR(ret);
 }
@@ -2497,6 +2916,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 {
 	struct nvme_fc_lport *lport;
 	struct nvme_fc_rport *rport;
+	struct nvme_ctrl *ctrl;
 	struct nvmet_fc_traddr laddr = { 0L, 0L };
 	struct nvmet_fc_traddr raddr = { 0L, 0L };
 	unsigned long flags;
@@ -2528,7 +2948,10 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 
 			spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
-			return __nvme_fc_create_ctrl(dev, opts, lport, rport);
+			ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport);
+			if (IS_ERR(ctrl))
+				nvme_fc_rport_put(rport);
+			return ctrl;
 		}
 	}
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
@@ -2546,11 +2969,20 @@ static struct nvmf_transport_ops nvme_fc_transport = {
 
 static int __init nvme_fc_init_module(void)
 {
+	int ret;
+
 	nvme_fc_wq = create_workqueue("nvme_fc_wq");
 	if (!nvme_fc_wq)
 		return -ENOMEM;
 
-	return nvmf_register_transport(&nvme_fc_transport);
+	ret = nvmf_register_transport(&nvme_fc_transport);
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	destroy_workqueue(nvme_fc_wq);
+	return ret;
 }
 
 static void __exit nvme_fc_exit_module(void)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 21cac8523bd8..e4e4e60b1224 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -241,9 +241,9 @@ static inline void _nvme_nvm_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
-	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
-	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
 }
 
 static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
@@ -324,7 +324,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
 	nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
 	nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
 	memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
-					sizeof(struct nvme_nvm_addr_format));
+					sizeof(struct nvm_addr_format));
 
 	ret = init_grps(nvm_id, nvme_nvm_id);
 out:
@@ -483,8 +483,8 @@ static void nvme_nvm_end_io(struct request *rq, int error)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
-	rqd->ppa_status = nvme_req(rq)->result.u64;
-	rqd->error = error;
+	rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
+	rqd->error = nvme_req(rq)->status;
 	nvm_end_io(rqd);
 
 	kfree(nvme_req(rq)->cmd);
@@ -510,12 +510,12 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	}
 	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
 
-	rq->ioprio = bio_prio(bio);
-	if (bio_has_data(bio))
-		rq->nr_phys_segments = bio_phys_segments(q, bio);
-
-	rq->__data_len = bio->bi_iter.bi_size;
-	rq->bio = rq->biotail = bio;
+	if (bio) {
+		blk_init_request_from_bio(rq, bio);
+	} else {
+		rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+		rq->__data_len = 0;
+	}
 
 	nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
 
@@ -526,21 +526,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	return 0;
 }
 
-static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
-	struct request_queue *q = dev->q;
-	struct nvme_ns *ns = q->queuedata;
-	struct nvme_nvm_command c = {};
-
-	c.erase.opcode = NVM_OP_ERASE;
-	c.erase.nsid = cpu_to_le32(ns->ns_id);
-	c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa);
-	c.erase.length = cpu_to_le16(rqd->nr_ppas - 1);
-	c.erase.control = cpu_to_le16(rqd->flags);
-
-	return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
-}
-
 static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
 {
 	struct nvme_ns *ns = nvmdev->q->queuedata;
@@ -576,7 +561,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
 	.set_bb_tbl		= nvme_nvm_set_bb_tbl,
 
 	.submit_io		= nvme_nvm_submit_io,
-	.erase_block		= nvme_nvm_erase_block,
 
 	.create_dma_pool	= nvme_nvm_create_dma_pool,
 	.destroy_dma_pool	= nvme_nvm_destroy_dma_pool,
@@ -611,7 +595,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
 	__le64 *metadata = NULL;
 	dma_addr_t metadata_dma;
 	DECLARE_COMPLETION_ONSTACK(wait);
-	int ret;
+	int ret = 0;
 
 	rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
 			NVME_QID_ANY);
@@ -681,9 +665,12 @@ submit:
 
 	wait_for_completion_io(&wait);
 
-	ret = nvme_error_status(rq->errors);
+	if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
+		ret = -EINTR;
+	else if (nvme_req(rq)->status & 0x7ff)
+		ret = -EIO;
 	if (result)
-		*result = rq->errors & 0x7ff;
+		*result = nvme_req(rq)->status & 0x7ff;
 	if (status)
 		*status = le64_to_cpu(nvme_req(rq)->result.u64);
 
@@ -766,7 +753,7 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
 	c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
 	/* cdw11-12 */
 	c.ph_rw.length = cpu_to_le16(vcmd.nppas);
-	c.ph_rw.control  = cpu_to_le32(vcmd.control);
+	c.ph_rw.control  = cpu_to_le16(vcmd.control);
 	c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
 	c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
 	c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
@@ -809,6 +796,8 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
 	struct request_queue *q = ns->queue;
 	struct nvm_dev *dev;
 
+	_nvme_nvm_check_size();
+
 	dev = nvm_alloc_dev(node);
 	if (!dev)
 		return -ENOMEM;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2aa20e3e5675..29c708ca9621 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -21,16 +21,6 @@
 #include <linux/lightnvm.h>
 #include <linux/sed-opal.h>
 
-enum {
-	/*
-	 * Driver internal status code for commands that were cancelled due
-	 * to timeouts or controller shutdown.  The value is negative so
-	 * that it a) doesn't overlap with the unsigned hardware error codes,
-	 * and b) can easily be tested for.
-	 */
-	NVME_SC_CANCELLED		= -EINTR,
-};
-
 extern unsigned char nvme_io_timeout;
 #define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
@@ -43,8 +33,6 @@ extern unsigned char shutdown_timeout;
 #define NVME_DEFAULT_KATO	5
 #define NVME_KATO_GRACE		10
 
-extern unsigned int nvme_max_retries;
-
 enum {
 	NVME_NS_LBA		= 0,
 	NVME_NS_LIGHTNVM	= 1,
@@ -68,10 +56,10 @@ enum nvme_quirks {
 	NVME_QUIRK_IDENTIFY_CNS			= (1 << 1),
 
 	/*
-	 * The controller deterministically returns O's on reads to discarded
-	 * logical blocks.
+	 * The controller deterministically returns O's on reads to
+	 * logical blocks that deallocate was called on.
 	 */
-	NVME_QUIRK_DISCARD_ZEROES		= (1 << 2),
+	NVME_QUIRK_DEALLOCATE_ZEROES		= (1 << 2),
 
 	/*
 	 * The controller needs a delay before starts checking the device
@@ -83,6 +71,11 @@ enum nvme_quirks {
 	 * APST should not be used.
 	 */
 	NVME_QUIRK_NO_APST			= (1 << 4),
+
+	/*
+	 * The deepest sleep state should not be used.
+	 */
+	NVME_QUIRK_NO_DEEPEST_PS		= (1 << 5),
 };
 
 /*
@@ -92,6 +85,13 @@ enum nvme_quirks {
 struct nvme_request {
 	struct nvme_command	*cmd;
 	union nvme_result	result;
+	u8			retries;
+	u8			flags;
+	u16			status;
+};
+
+enum {
+	NVME_REQ_CANCELLED		= (1 << 0),
 };
 
 static inline struct nvme_request *nvme_req(struct request *req)
@@ -249,25 +249,17 @@ static inline void nvme_cleanup_cmd(struct request *req)
 	}
 }
 
-static inline int nvme_error_status(u16 status)
+static inline void nvme_end_request(struct request *req, __le16 status,
+		union nvme_result result)
 {
-	switch (status & 0x7ff) {
-	case NVME_SC_SUCCESS:
-		return 0;
-	case NVME_SC_CAP_EXCEEDED:
-		return -ENOSPC;
-	default:
-		return -EIO;
-	}
-}
+	struct nvme_request *rq = nvme_req(req);
 
-static inline bool nvme_req_needs_retry(struct request *req, u16 status)
-{
-	return !(status & NVME_SC_DNR || blk_noretry_request(req)) &&
-		(jiffies - req->start_time) < req->timeout &&
-		req->retries < nvme_max_retries;
+	rq->status = le16_to_cpu(status) >> 1;
+	rq->result = result;
+	blk_mq_complete_request(req);
 }
 
+void nvme_complete_rq(struct request *req);
 void nvme_cancel_request(struct request *req, void *data, bool reserved);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		enum nvme_ctrl_state new_state);
@@ -302,7 +294,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags, int qid);
-void nvme_requeue_req(struct request *req);
 int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 26a5fd05fe88..c8541c3dcd19 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -19,6 +19,7 @@
 #include <linux/blk-mq-pci.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
+#include <linux/dmi.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/genhd.h>
@@ -103,8 +104,22 @@ struct nvme_dev {
 	u32 cmbloc;
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
+	u32 *dbbuf_dbs;
+	dma_addr_t dbbuf_dbs_dma_addr;
+	u32 *dbbuf_eis;
+	dma_addr_t dbbuf_eis_dma_addr;
 };
 
+static inline unsigned int sq_idx(unsigned int qid, u32 stride)
+{
+	return qid * 2 * stride;
+}
+
+static inline unsigned int cq_idx(unsigned int qid, u32 stride)
+{
+	return (qid * 2 + 1) * stride;
+}
+
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
 {
 	return container_of(ctrl, struct nvme_dev, ctrl);
@@ -133,6 +148,10 @@ struct nvme_queue {
 	u16 qid;
 	u8 cq_phase;
 	u8 cqe_seen;
+	u32 *dbbuf_sq_db;
+	u32 *dbbuf_cq_db;
+	u32 *dbbuf_sq_ei;
+	u32 *dbbuf_cq_ei;
 };
 
 /*
@@ -171,6 +190,112 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
 	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
+	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
+}
+
+static inline unsigned int nvme_dbbuf_size(u32 stride)
+{
+	return ((num_possible_cpus() + 1) * 8 * stride);
+}
+
+static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
+{
+	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+
+	if (dev->dbbuf_dbs)
+		return 0;
+
+	dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
+					    &dev->dbbuf_dbs_dma_addr,
+					    GFP_KERNEL);
+	if (!dev->dbbuf_dbs)
+		return -ENOMEM;
+	dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size,
+					    &dev->dbbuf_eis_dma_addr,
+					    GFP_KERNEL);
+	if (!dev->dbbuf_eis) {
+		dma_free_coherent(dev->dev, mem_size,
+				  dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
+		dev->dbbuf_dbs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
+{
+	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+
+	if (dev->dbbuf_dbs) {
+		dma_free_coherent(dev->dev, mem_size,
+				  dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
+		dev->dbbuf_dbs = NULL;
+	}
+	if (dev->dbbuf_eis) {
+		dma_free_coherent(dev->dev, mem_size,
+				  dev->dbbuf_eis, dev->dbbuf_eis_dma_addr);
+		dev->dbbuf_eis = NULL;
+	}
+}
+
+static void nvme_dbbuf_init(struct nvme_dev *dev,
+			    struct nvme_queue *nvmeq, int qid)
+{
+	if (!dev->dbbuf_dbs || !qid)
+		return;
+
+	nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)];
+	nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)];
+	nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)];
+	nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
+}
+
+static void nvme_dbbuf_set(struct nvme_dev *dev)
+{
+	struct nvme_command c;
+
+	if (!dev->dbbuf_dbs)
+		return;
+
+	memset(&c, 0, sizeof(c));
+	c.dbbuf.opcode = nvme_admin_dbbuf;
+	c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr);
+	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
+
+	if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
+		dev_warn(dev->dev, "unable to set dbbuf\n");
+		/* Free memory and continue on */
+		nvme_dbbuf_dma_free(dev);
+	}
+}
+
+static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
+{
+	return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old);
+}
+
+/* Update dbbuf and return true if an MMIO is required */
+static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
+					      volatile u32 *dbbuf_ei)
+{
+	if (dbbuf_db) {
+		u16 old_value;
+
+		/*
+		 * Ensure that the queue is written before updating
+		 * the doorbell in memory
+		 */
+		wmb();
+
+		old_value = *dbbuf_db;
+		*dbbuf_db = value;
+
+		if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
+			return false;
+	}
+
+	return true;
 }
 
 /*
@@ -297,7 +422,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
 
 	if (++tail == nvmeq->q_depth)
 		tail = 0;
-	writel(tail, nvmeq->q_db);
+	if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db,
+					      nvmeq->dbbuf_sq_ei))
+		writel(tail, nvmeq->q_db);
 	nvmeq->sq_tail = tail;
 }
 
@@ -326,10 +453,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	iod->nents = 0;
 	iod->length = size;
 
-	if (!(rq->rq_flags & RQF_DONTPREP)) {
-		rq->retries = 0;
-		rq->rq_flags |= RQF_DONTPREP;
-	}
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
@@ -628,34 +751,12 @@ out_free_cmd:
 	return ret;
 }
 
-static void nvme_complete_rq(struct request *req)
+static void nvme_pci_complete_rq(struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	struct nvme_dev *dev = iod->nvmeq->dev;
-	int error = 0;
-
-	nvme_unmap_data(dev, req);
 
-	if (unlikely(req->errors)) {
-		if (nvme_req_needs_retry(req, req->errors)) {
-			req->retries++;
-			nvme_requeue_req(req);
-			return;
-		}
-
-		if (blk_rq_is_passthrough(req))
-			error = req->errors;
-		else
-			error = nvme_error_status(req->errors);
-	}
-
-	if (unlikely(iod->aborted)) {
-		dev_warn(dev->ctrl.device,
-			"completing aborted command with status: %04x\n",
-			req->errors);
-	}
-
-	blk_mq_end_request(req, error);
+	nvme_unmap_data(iod->nvmeq->dev, req);
+	nvme_complete_rq(req);
 }
 
 /* We read the CQE phase first to check if the rest of the entry is valid */
@@ -705,15 +806,16 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 		}
 
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-		nvme_req(req)->result = cqe.result;
-		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
+		nvme_end_request(req, cqe.status, cqe.result);
 	}
 
 	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
 		return;
 
 	if (likely(nvmeq->cq_vector >= 0))
-		writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+		if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+						      nvmeq->dbbuf_cq_ei))
+			writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
 	nvmeq->cq_head = head;
 	nvmeq->cq_phase = phase;
 
@@ -745,10 +847,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 	return IRQ_NONE;
 }
 
-static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
-	struct nvme_queue *nvmeq = hctx->driver_data;
-
 	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
 		spin_lock_irq(&nvmeq->q_lock);
 		__nvme_process_cq(nvmeq, &tag);
@@ -761,6 +861,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	return 0;
 }
 
+static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+{
+	struct nvme_queue *nvmeq = hctx->driver_data;
+
+	return __nvme_poll(nvmeq, tag);
+}
+
 static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
@@ -812,7 +919,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 						struct nvme_queue *nvmeq)
 {
 	struct nvme_command c;
-	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+	int flags = NVME_QUEUE_PHYS_CONTIG;
 
 	/*
 	 * Note: we (ab)use the fact the the prp fields survive if no data
@@ -843,9 +950,9 @@ static void abort_endio(struct request *req, int error)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
-	u16 status = req->errors;
 
-	dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
+	dev_warn(nvmeq->dev->ctrl.device,
+		 "Abort status: 0x%x", nvme_req(req)->status);
 	atomic_inc(&nvmeq->dev->ctrl.abort_limit);
 	blk_mq_free_request(req);
 }
@@ -859,6 +966,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_command cmd;
 
 	/*
+	 * Did we miss an interrupt?
+	 */
+	if (__nvme_poll(nvmeq, req->tag)) {
+		dev_warn(dev->ctrl.device,
+			 "I/O %d QID %d timeout, completion polled\n",
+			 req->tag, nvmeq->qid);
+		return BLK_EH_HANDLED;
+	}
+
+	/*
 	 * Shutdown immediately if controller times out while starting. The
 	 * reset work will see the pci device disabled when it gets the forced
 	 * cancellation error. All outstanding requests are completed on
@@ -869,7 +986,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 			 "I/O %d QID %d timeout, disable controller\n",
 			 req->tag, nvmeq->qid);
 		nvme_dev_disable(dev, false);
-		req->errors = NVME_SC_CANCELLED;
+		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		return BLK_EH_HANDLED;
 	}
 
@@ -889,7 +1006,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		 * Mark the request as handled, since the inline shutdown
 		 * forces all outstanding requests to complete.
 		 */
-		req->errors = NVME_SC_CANCELLED;
+		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		return BLK_EH_HANDLED;
 	}
 
@@ -1097,6 +1214,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 	nvmeq->cq_phase = 1;
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+	nvme_dbbuf_init(dev, nvmeq, qid);
 	dev->online_queues++;
 	spin_unlock_irq(&nvmeq->q_lock);
 }
@@ -1129,18 +1247,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	return result;
 }
 
-static struct blk_mq_ops nvme_mq_admin_ops = {
+static const struct blk_mq_ops nvme_mq_admin_ops = {
 	.queue_rq	= nvme_queue_rq,
-	.complete	= nvme_complete_rq,
+	.complete	= nvme_pci_complete_rq,
 	.init_hctx	= nvme_admin_init_hctx,
 	.exit_hctx      = nvme_admin_exit_hctx,
 	.init_request	= nvme_admin_init_request,
 	.timeout	= nvme_timeout,
 };
 
-static struct blk_mq_ops nvme_mq_ops = {
+static const struct blk_mq_ops nvme_mq_ops = {
 	.queue_rq	= nvme_queue_rq,
-	.complete	= nvme_complete_rq,
+	.complete	= nvme_pci_complete_rq,
 	.init_hctx	= nvme_init_hctx,
 	.init_request	= nvme_init_request,
 	.map_queues	= nvme_pci_map_queues,
@@ -1569,6 +1687,8 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		if (blk_mq_alloc_tag_set(&dev->tagset))
 			return 0;
 		dev->ctrl.tagset = &dev->tagset;
+
+		nvme_dbbuf_set(dev);
 	} else {
 		blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
 
@@ -1755,6 +1875,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
 
+	nvme_dbbuf_dma_free(dev);
 	put_device(dev->dev);
 	if (dev->tagset.tags)
 		blk_mq_free_tag_set(&dev->tagset);
@@ -1822,6 +1943,13 @@ static void nvme_reset_work(struct work_struct *work)
 		dev->ctrl.opal_dev = NULL;
 	}
 
+	if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) {
+		result = nvme_dbbuf_dma_alloc(dev);
+		if (result)
+			dev_warn(dev->dev,
+				 "unable to allocate dma for dbbuf\n");
+	}
+
 	result = nvme_setup_io_queues(dev);
 	if (result)
 		goto out;
@@ -1943,10 +2071,31 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	return -ENODEV;
 }
 
+static unsigned long check_dell_samsung_bug(struct pci_dev *pdev)
+{
+	if (pdev->vendor == 0x144d && pdev->device == 0xa802) {
+		/*
+		 * Several Samsung devices seem to drop off the PCIe bus
+		 * randomly when APST is on and uses the deepest sleep state.
+		 * This has been observed on a Samsung "SM951 NVMe SAMSUNG
+		 * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD
+		 * 950 PRO 256GB", but it seems to be restricted to two Dell
+		 * laptops.
+		 */
+		if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") &&
+		    (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") ||
+		     dmi_match(DMI_PRODUCT_NAME, "Precision 5510")))
+			return NVME_QUIRK_NO_DEEPEST_PS;
+	}
+
+	return 0;
+}
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
 	struct nvme_dev *dev;
+	unsigned long quirks = id->driver_data;
 
 	node = dev_to_node(&pdev->dev);
 	if (node == NUMA_NO_NODE)
@@ -1978,8 +2127,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto put_pci;
 
+	quirks |= check_dell_samsung_bug(pdev);
+
 	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
-			id->driver_data);
+			quirks);
 	if (result)
 		goto release_pools;
 
@@ -2135,13 +2286,13 @@ static const struct pci_error_handlers nvme_err_handler = {
 static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0953),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
-				NVME_QUIRK_DISCARD_ZEROES, },
+				NVME_QUIRK_DEALLOCATE_ZEROES, },
 	{ PCI_VDEVICE(INTEL, 0x0a53),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
-				NVME_QUIRK_DISCARD_ZEROES, },
+				NVME_QUIRK_DEALLOCATE_ZEROES, },
 	{ PCI_VDEVICE(INTEL, 0x0a54),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
-				NVME_QUIRK_DISCARD_ZEROES, },
+				NVME_QUIRK_DEALLOCATE_ZEROES, },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 16f84eb0b95e..29cf88ac3f61 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -34,7 +34,7 @@
 #include "fabrics.h"
 
 
-#define NVME_RDMA_CONNECT_TIMEOUT_MS	1000		/* 1 second */
+#define NVME_RDMA_CONNECT_TIMEOUT_MS	3000		/* 3 second */
 
 #define NVME_RDMA_MAX_SEGMENT_SIZE	0xffffff	/* 24-bit SGL field */
 
@@ -118,7 +118,6 @@ struct nvme_rdma_ctrl {
 
 	struct nvme_rdma_qe	async_event_sqe;
 
-	int			reconnect_delay;
 	struct delayed_work	reconnect_work;
 
 	struct list_head	list;
@@ -129,14 +128,8 @@ struct nvme_rdma_ctrl {
 	u64			cap;
 	u32			max_fr_pages;
 
-	union {
-		struct sockaddr addr;
-		struct sockaddr_in addr_in;
-	};
-	union {
-		struct sockaddr src_addr;
-		struct sockaddr_in src_addr_in;
-	};
+	struct sockaddr_storage addr;
+	struct sockaddr_storage src_addr;
 
 	struct nvme_ctrl	ctrl;
 };
@@ -569,11 +562,12 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 		return PTR_ERR(queue->cm_id);
 	}
 
-	queue->cm_error = -ETIMEDOUT;
 	if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
-		src_addr = &ctrl->src_addr;
+		src_addr = (struct sockaddr *)&ctrl->src_addr;
 
-	ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
+	queue->cm_error = -ETIMEDOUT;
+	ret = rdma_resolve_addr(queue->cm_id, src_addr,
+			(struct sockaddr *)&ctrl->addr,
 			NVME_RDMA_CONNECT_TIMEOUT_MS);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
@@ -712,6 +706,26 @@ free_ctrl:
 	kfree(ctrl);
 }
 
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+{
+	/* If we are resetting/deleting then do nothing */
+	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
+			ctrl->ctrl.state == NVME_CTRL_LIVE);
+		return;
+	}
+
+	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+		dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
+			ctrl->ctrl.opts->reconnect_delay);
+		queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+				ctrl->ctrl.opts->reconnect_delay * HZ);
+	} else {
+		dev_info(ctrl->ctrl.device, "Removing controller...\n");
+		queue_work(nvme_rdma_wq, &ctrl->delete_work);
+	}
+}
+
 static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
@@ -719,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	bool changed;
 	int ret;
 
+	++ctrl->ctrl.opts->nr_reconnects;
+
 	if (ctrl->queue_count > 1) {
 		nvme_rdma_free_io_queues(ctrl);
 
@@ -763,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
+	ctrl->ctrl.opts->nr_reconnects = 0;
 
 	if (ctrl->queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
@@ -777,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 stop_admin_q:
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 requeue:
-	/* Make sure we are not resetting/deleting */
-	if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) {
-		dev_info(ctrl->ctrl.device,
-			"Failed reconnect attempt, requeueing...\n");
-		queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
-					ctrl->reconnect_delay * HZ);
-	}
+	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
+			ctrl->ctrl.opts->nr_reconnects);
+	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -810,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
 
-	dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n",
-		ctrl->reconnect_delay);
-
-	queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
-				ctrl->reconnect_delay * HZ);
+	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -1169,8 +1178,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 	    wc->ex.invalidate_rkey == req->mr->rkey)
 		req->mr->need_inval = false;
 
-	req->req.result = cqe->result;
-	blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
+	nvme_end_request(rq, cqe->status, cqe->result);
 	return ret;
 }
 
@@ -1407,7 +1415,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
 	nvme_rdma_error_recovery(req->queue->ctrl);
 
 	/* fail with DNR on cmd timeout */
-	rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
+	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
 
 	return BLK_EH_HANDLED;
 }
@@ -1509,27 +1517,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 static void nvme_rdma_complete_rq(struct request *rq)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_rdma_queue *queue = req->queue;
-	int error = 0;
-
-	nvme_rdma_unmap_data(queue, rq);
 
-	if (unlikely(rq->errors)) {
-		if (nvme_req_needs_retry(rq, rq->errors)) {
-			nvme_requeue_req(rq);
-			return;
-		}
-
-		if (blk_rq_is_passthrough(rq))
-			error = rq->errors;
-		else
-			error = nvme_error_status(rq->errors);
-	}
-
-	blk_mq_end_request(rq, error);
+	nvme_rdma_unmap_data(req->queue, rq);
+	nvme_complete_rq(rq);
 }
 
-static struct blk_mq_ops nvme_rdma_mq_ops = {
+static const struct blk_mq_ops nvme_rdma_mq_ops = {
 	.queue_rq	= nvme_rdma_queue_rq,
 	.complete	= nvme_rdma_complete_rq,
 	.init_request	= nvme_rdma_init_request,
@@ -1540,7 +1533,7 @@ static struct blk_mq_ops nvme_rdma_mq_ops = {
 	.timeout	= nvme_rdma_timeout,
 };
 
-static struct blk_mq_ops nvme_rdma_admin_mq_ops = {
+static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
 	.queue_rq	= nvme_rdma_queue_rq,
 	.complete	= nvme_rdma_complete_rq,
 	.init_request	= nvme_rdma_init_admin_request,
@@ -1857,27 +1850,13 @@ out_free_io_queues:
 	return ret;
 }
 
-static int nvme_rdma_parse_ipaddr(struct sockaddr_in *in_addr, char *p)
-{
-	u8 *addr = (u8 *)&in_addr->sin_addr.s_addr;
-	size_t buflen = strlen(p);
-
-	/* XXX: handle IPv6 addresses */
-
-	if (buflen > INET_ADDRSTRLEN)
-		return -EINVAL;
-	if (in4_pton(p, buflen, addr, '\0', NULL) == 0)
-		return -EINVAL;
-	in_addr->sin_family = AF_INET;
-	return 0;
-}
-
 static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_rdma_ctrl *ctrl;
 	int ret;
 	bool changed;
+	char *port;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl)
@@ -1885,40 +1864,33 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	ctrl->ctrl.opts = opts;
 	INIT_LIST_HEAD(&ctrl->list);
 
-	ret = nvme_rdma_parse_ipaddr(&ctrl->addr_in, opts->traddr);
+	if (opts->mask & NVMF_OPT_TRSVCID)
+		port = opts->trsvcid;
+	else
+		port = __stringify(NVME_RDMA_IP_PORT);
+
+	ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
+			opts->traddr, port, &ctrl->addr);
 	if (ret) {
-		pr_err("malformed IP address passed: %s\n", opts->traddr);
+		pr_err("malformed address passed: %s:%s\n", opts->traddr, port);
 		goto out_free_ctrl;
 	}
 
 	if (opts->mask & NVMF_OPT_HOST_TRADDR) {
-		ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
-				opts->host_traddr);
+		ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
+			opts->host_traddr, NULL, &ctrl->src_addr);
 		if (ret) {
-			pr_err("malformed src IP address passed: %s\n",
+			pr_err("malformed src address passed: %s\n",
 			       opts->host_traddr);
 			goto out_free_ctrl;
 		}
 	}
 
-	if (opts->mask & NVMF_OPT_TRSVCID) {
-		u16 port;
-
-		ret = kstrtou16(opts->trsvcid, 0, &port);
-		if (ret)
-			goto out_free_ctrl;
-
-		ctrl->addr_in.sin_port = cpu_to_be16(port);
-	} else {
-		ctrl->addr_in.sin_port = cpu_to_be16(NVME_RDMA_IP_PORT);
-	}
-
 	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
 				0 /* no quirks, we're perfect! */);
 	if (ret)
 		goto out_free_ctrl;
 
-	ctrl->reconnect_delay = opts->reconnect_delay;
 	INIT_DELAYED_WORK(&ctrl->reconnect_work,
 			nvme_rdma_reconnect_ctrl_work);
 	INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
@@ -1977,7 +1949,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
+	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
 		ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
 
 	kref_get(&ctrl->ctrl.kref);
@@ -2013,7 +1985,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
 	.name		= "rdma",
 	.required_opts	= NVMF_OPT_TRADDR,
 	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
-			  NVMF_OPT_HOST_TRADDR,
+			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
 	.create_ctrl	= nvme_rdma_create_ctrl,
 };
 
@@ -2055,12 +2027,20 @@ static int __init nvme_rdma_init_module(void)
 		return -ENOMEM;
 
 	ret = ib_register_client(&nvme_rdma_ib_client);
-	if (ret) {
-		destroy_workqueue(nvme_rdma_wq);
-		return ret;
-	}
+	if (ret)
+		goto err_destroy_wq;
+
+	ret = nvmf_register_transport(&nvme_rdma_transport);
+	if (ret)
+		goto err_unreg_client;
+
+	return 0;
 
-	return nvmf_register_transport(&nvme_rdma_transport);
+err_unreg_client:
+	ib_unregister_client(&nvme_rdma_ib_client);
+err_destroy_wq:
+	destroy_workqueue(nvme_rdma_wq);
+	return ret;
 }
 
 static void __exit nvme_rdma_cleanup_module(void)
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index f49ae2758bb7..1f7671e631dd 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -1609,7 +1609,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_command c;
 	u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
 	u16 control;
-	u32 max_blocks = queue_max_hw_sectors(ns->queue);
+	u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
 
 	num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
 
@@ -2138,15 +2138,6 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	return res;
 }
 
-static int nvme_trans_security_protocol(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr,
-					u8 *cmd)
-{
-	return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-}
-
 static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr)
 {
@@ -2414,10 +2405,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
 	case REQUEST_SENSE:
 		retcode = nvme_trans_request_sense(ns, hdr, cmd);
 		break;
-	case SECURITY_PROTOCOL_IN:
-	case SECURITY_PROTOCOL_OUT:
-		retcode = nvme_trans_security_protocol(ns, hdr, cmd);
-		break;
 	case SYNCHRONIZE_CACHE:
 		retcode = nvme_trans_synchronize_cache(ns, hdr);
 		break;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 76450b0c55f1..ff1f97006322 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -121,7 +121,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 	}
 
 	switch (req->cmd->get_log_page.lid) {
-	case 0x01:
+	case NVME_LOG_ERROR:
 		/*
 		 * We currently never set the More bit in the status field,
 		 * so all error log entries are invalid and can be zeroed out.
@@ -129,7 +129,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 		 * mandatory log page.
 		 */
 		break;
-	case 0x02:
+	case NVME_LOG_SMART:
 		/*
 		 * XXX: fill out actual smart log
 		 *
@@ -149,7 +149,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 			goto err;
 		}
 		break;
-	case 0x03:
+	case NVME_LOG_FW_SLOT:
 		/*
 		 * We only support a single firmware slot which always is
 		 * active, so we can zero out the whole firmware slot log and
@@ -480,31 +480,25 @@ static void nvmet_execute_keep_alive(struct nvmet_req *req)
 	nvmet_req_complete(req, 0);
 }
 
-int nvmet_parse_admin_cmd(struct nvmet_req *req)
+u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
+	u16 ret;
 
 	req->ns = NULL;
 
-	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
-		pr_err("nvmet: got admin cmd %d while CC.EN == 0\n",
-				cmd->common.opcode);
-		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
-	}
-	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
-		pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n",
-				cmd->common.opcode);
-		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
-	}
+	ret = nvmet_check_ctrl_status(req, cmd);
+	if (unlikely(ret))
+		return ret;
 
 	switch (cmd->common.opcode) {
 	case nvme_admin_get_log_page:
 		req->data_len = nvmet_get_log_page_len(cmd);
 
 		switch (cmd->get_log_page.lid) {
-		case 0x01:
-		case 0x02:
-		case 0x03:
+		case NVME_LOG_ERROR:
+		case NVME_LOG_SMART:
+		case NVME_LOG_FW_SLOT:
 			req->execute = nvmet_execute_get_log_page;
 			return 0;
 		}
@@ -545,6 +539,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
 		return 0;
 	}
 
-	pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+	pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+	       req->sq->qid);
 	return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 }
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 798653b329b2..cf90713043da 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -273,8 +273,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 	ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
 			NULL);
 	if (IS_ERR(ns->bdev)) {
-		pr_err("nvmet: failed to open block device %s: (%ld)\n",
-			ns->device_path, PTR_ERR(ns->bdev));
+		pr_err("failed to open block device %s: (%ld)\n",
+		       ns->device_path, PTR_ERR(ns->bdev));
 		ret = PTR_ERR(ns->bdev);
 		ns->bdev = NULL;
 		goto out_unlock;
@@ -661,6 +661,23 @@ out:
 	return status;
 }
 
+u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
+{
+	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
+		pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
+		       cmd->common.opcode, req->sq->qid);
+		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+	}
+
+	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
+		pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
+		       cmd->common.opcode, req->sq->qid);
+		req->ns = NULL;
+		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+	}
+	return 0;
+}
+
 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
 		const char *hostnqn)
 {
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index af8aabf05335..1aaf597e81fc 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -159,15 +159,15 @@ out:
 	nvmet_req_complete(req, status);
 }
 
-int nvmet_parse_discovery_cmd(struct nvmet_req *req)
+u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
 
 	req->ns = NULL;
 
 	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
-		pr_err("nvmet: got cmd %d while not ready\n",
-				cmd->common.opcode);
+		pr_err("got cmd %d while not ready\n",
+		       cmd->common.opcode);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 	}
 
@@ -180,8 +180,8 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
 			req->execute = nvmet_execute_get_disc_log_page;
 			return 0;
 		default:
-			pr_err("nvmet: unsupported get_log_page lid %d\n",
-				cmd->get_log_page.lid);
+			pr_err("unsupported get_log_page lid %d\n",
+			       cmd->get_log_page.lid);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 		}
 	case nvme_admin_identify:
@@ -192,17 +192,16 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
 				nvmet_execute_identify_disc_ctrl;
 			return 0;
 		default:
-			pr_err("nvmet: unsupported identify cns %d\n",
-				cmd->identify.cns);
+			pr_err("unsupported identify cns %d\n",
+			       cmd->identify.cns);
 			return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 		}
 	default:
-		pr_err("nvmet: unsupported cmd %d\n",
-				cmd->common.opcode);
+		pr_err("unsupported cmd %d\n", cmd->common.opcode);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 	}
 
-	pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+	pr_err("unhandled cmd %d\n", cmd->common.opcode);
 	return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 }
 
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 8bd022af3df6..3cc17269504b 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -73,7 +73,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
 	nvmet_req_complete(req, status);
 }
 
-int nvmet_parse_fabrics_cmd(struct nvmet_req *req)
+u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
 
@@ -122,7 +122,15 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 	struct nvmet_ctrl *ctrl = NULL;
 	u16 status = 0;
 
-	d = kmap(sg_page(req->sg)) + req->sg->offset;
+	d = kmalloc(sizeof(*d), GFP_KERNEL);
+	if (!d) {
+		status = NVME_SC_INTERNAL;
+		goto complete;
+	}
+
+	status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d));
+	if (status)
+		goto out;
 
 	/* zero out initial completion result, assign values as needed */
 	req->rsp->result.u32 = 0;
@@ -143,7 +151,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 	}
 
 	status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
-			le32_to_cpu(c->kato), &ctrl);
+				  le32_to_cpu(c->kato), &ctrl);
 	if (status)
 		goto out;
 
@@ -158,7 +166,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 	req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
 
 out:
-	kunmap(sg_page(req->sg));
+	kfree(d);
+complete:
 	nvmet_req_complete(req, status);
 }
 
@@ -170,7 +179,15 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 	u16 qid = le16_to_cpu(c->qid);
 	u16 status = 0;
 
-	d = kmap(sg_page(req->sg)) + req->sg->offset;
+	d = kmalloc(sizeof(*d), GFP_KERNEL);
+	if (!d) {
+		status = NVME_SC_INTERNAL;
+		goto complete;
+	}
+
+	status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d));
+	if (status)
+		goto out;
 
 	/* zero out initial completion result, assign values as needed */
 	req->rsp->result.u32 = 0;
@@ -183,8 +200,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 	}
 
 	status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
-			le16_to_cpu(d->cntlid),
-			req, &ctrl);
+				     le16_to_cpu(d->cntlid),
+				     req, &ctrl);
 	if (status)
 		goto out;
 
@@ -205,7 +222,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 	pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
 
 out:
-	kunmap(sg_page(req->sg));
+	kfree(d);
+complete:
 	nvmet_req_complete(req, status);
 	return;
 
@@ -214,7 +232,7 @@ out_ctrl_put:
 	goto out;
 }
 
-int nvmet_parse_connect_cmd(struct nvmet_req *req)
+u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
 
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 8f483ee7868c..62eba29c85fb 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -82,10 +82,13 @@ struct nvmet_fc_fcp_iod {
 	enum nvmet_fcp_datadir		io_dir;
 	bool				active;
 	bool				abort;
+	bool				aborted;
+	bool				writedataactive;
 	spinlock_t			flock;
 
 	struct nvmet_req		req;
 	struct work_struct		work;
+	struct work_struct		done_work;
 
 	struct nvmet_fc_tgtport		*tgtport;
 	struct nvmet_fc_tgt_queue	*queue;
@@ -116,7 +119,7 @@ struct nvmet_fc_tgt_queue {
 	u16				qid;
 	u16				sqsize;
 	u16				ersp_ratio;
-	u16				sqhd;
+	__le16				sqhd;
 	int				cpu;
 	atomic_t			connected;
 	atomic_t			sqtail;
@@ -213,6 +216,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt);
 
 static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
 static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work);
+static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work);
 static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
 static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
 static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
@@ -414,9 +418,13 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport,
 
 	for (i = 0; i < queue->sqsize; fod++, i++) {
 		INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work);
+		INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work);
 		fod->tgtport = tgtport;
 		fod->queue = queue;
 		fod->active = false;
+		fod->abort = false;
+		fod->aborted = false;
+		fod->fcpreq = NULL;
 		list_add_tail(&fod->fcp_list, &queue->fod_list);
 		spin_lock_init(&fod->flock);
 
@@ -463,7 +471,6 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 	if (fod) {
 		list_del(&fod->fcp_list);
 		fod->active = true;
-		fod->abort = false;
 		/*
 		 * no queue reference is taken, as it was taken by the
 		 * queue lookup just prior to the allocation. The iod
@@ -479,17 +486,30 @@ static void
 nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 			struct nvmet_fc_fcp_iod *fod)
 {
+	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
+	struct nvmet_fc_tgtport *tgtport = fod->tgtport;
 	unsigned long flags;
 
+	fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
+				sizeof(fod->rspiubuf), DMA_TO_DEVICE);
+
+	fcpreq->nvmet_fc_private = NULL;
+
 	spin_lock_irqsave(&queue->qlock, flags);
 	list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
 	fod->active = false;
+	fod->abort = false;
+	fod->aborted = false;
+	fod->writedataactive = false;
+	fod->fcpreq = NULL;
 	spin_unlock_irqrestore(&queue->qlock, flags);
 
 	/*
 	 * release the reference taken at queue lookup and fod allocation
 	 */
 	nvmet_fc_tgt_q_put(queue);
+
+	tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
 }
 
 static int
@@ -616,32 +636,12 @@ nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue)
 
 
 static void
-nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport,
-				struct nvmefc_tgt_fcp_req *fcpreq)
-{
-	int ret;
-
-	fcpreq->op = NVMET_FCOP_ABORT;
-	fcpreq->offset = 0;
-	fcpreq->timeout = 0;
-	fcpreq->transfer_length = 0;
-	fcpreq->transferred_length = 0;
-	fcpreq->fcp_error = 0;
-	fcpreq->sg_cnt = 0;
-
-	ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq);
-	if (ret)
-		/* should never reach here !! */
-		WARN_ON(1);
-}
-
-
-static void
 nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 {
+	struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
 	struct nvmet_fc_fcp_iod *fod = queue->fod;
 	unsigned long flags;
-	int i;
+	int i, writedataactive;
 	bool disconnect;
 
 	disconnect = atomic_xchg(&queue->connected, 0);
@@ -652,7 +652,20 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 		if (fod->active) {
 			spin_lock(&fod->flock);
 			fod->abort = true;
+			writedataactive = fod->writedataactive;
 			spin_unlock(&fod->flock);
+			/*
+			 * only call lldd abort routine if waiting for
+			 * writedata. other outstanding ops should finish
+			 * on their own.
+			 */
+			if (writedataactive) {
+				spin_lock(&fod->flock);
+				fod->aborted = true;
+				spin_unlock(&fod->flock);
+				tgtport->ops->fcp_abort(
+					&tgtport->fc_target_port, fod->fcpreq);
+			}
 		}
 	}
 	spin_unlock_irqrestore(&queue->qlock, flags);
@@ -846,7 +859,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
 	int ret, idx;
 
 	if (!template->xmt_ls_rsp || !template->fcp_op ||
-	    !template->targetport_delete ||
+	    !template->fcp_abort ||
+	    !template->fcp_req_release || !template->targetport_delete ||
 	    !template->max_hw_queues || !template->max_sgl_segments ||
 	    !template->max_dif_sgl_segments || !template->dma_boundary) {
 		ret = -EINVAL;
@@ -1044,7 +1058,7 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport);
 
 
 static void
-nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, u32 desc_len, u8 rqst_ls_cmd)
+nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
 {
 	struct fcnvme_ls_acc_hdr *acc = buf;
 
@@ -1189,8 +1203,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 			validation_errors[ret]);
 		iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
 				NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
-				ELS_RJT_LOGIC,
-				ELS_EXPL_NONE, 0);
+				FCNVME_RJT_RC_LOGIC,
+				FCNVME_RJT_EXP_NONE, 0);
 		return;
 	}
 
@@ -1281,8 +1295,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
 		iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
 				NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
 				(ret == VERR_NO_ASSOC) ?
-						ELS_RJT_PROT : ELS_RJT_LOGIC,
-				ELS_EXPL_NONE, 0);
+					FCNVME_RJT_RC_INV_ASSOC :
+					FCNVME_RJT_RC_LOGIC,
+				FCNVME_RJT_EXP_NONE, 0);
 		return;
 	}
 
@@ -1369,8 +1384,12 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
 			validation_errors[ret]);
 		iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
 				NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
-				(ret == 8) ? ELS_RJT_PROT : ELS_RJT_LOGIC,
-				ELS_EXPL_NONE, 0);
+				(ret == VERR_NO_ASSOC) ?
+					FCNVME_RJT_RC_INV_ASSOC :
+					(ret == VERR_NO_CONN) ?
+						FCNVME_RJT_RC_INV_CONN :
+						FCNVME_RJT_RC_LOGIC,
+				FCNVME_RJT_EXP_NONE, 0);
 		return;
 	}
 
@@ -1479,7 +1498,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
 	default:
 		iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf,
 				NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd,
-				ELS_RJT_INVAL, ELS_EXPL_NONE, 0);
+				FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
 	}
 
 	nvmet_fc_xmt_ls_rsp(tgtport, iod);
@@ -1619,6 +1638,8 @@ nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod)
 	for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count)
 		__free_page(sg_page(sg));
 	kfree(fod->data_sg);
+	fod->data_sg = NULL;
+	fod->data_sg_cnt = 0;
 }
 
 
@@ -1679,7 +1700,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
 	    xfr_length != fod->total_length ||
 	    (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] ||
 	    (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) ||
-	    queue_90percent_full(fod->queue, cqe->sq_head))
+	    queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head)))
 		send_ersp = true;
 
 	/* re-set the fields */
@@ -1704,6 +1725,26 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
 static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq);
 
 static void
+nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport,
+				struct nvmet_fc_fcp_iod *fod)
+{
+	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
+
+	/* data no longer needed */
+	nvmet_fc_free_tgt_pgs(fod);
+
+	/*
+	 * if an ABTS was received or we issued the fcp_abort early
+	 * don't call abort routine again.
+	 */
+	/* no need to take lock - lock was taken earlier to get here */
+	if (!fod->aborted)
+		tgtport->ops->fcp_abort(&tgtport->fc_target_port, fcpreq);
+
+	nvmet_fc_free_fcp_iod(fod->queue, fod);
+}
+
+static void
 nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
 				struct nvmet_fc_fcp_iod *fod)
 {
@@ -1716,7 +1757,7 @@ nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
 
 	ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq);
 	if (ret)
-		nvmet_fc_abort_op(tgtport, fod->fcpreq);
+		nvmet_fc_abort_op(tgtport, fod);
 }
 
 static void
@@ -1725,6 +1766,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
 {
 	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
 	struct scatterlist *sg, *datasg;
+	unsigned long flags;
 	u32 tlen, sg_off;
 	int ret;
 
@@ -1789,10 +1831,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
 		 */
 		fod->abort = true;
 
-		if (op == NVMET_FCOP_WRITEDATA)
+		if (op == NVMET_FCOP_WRITEDATA) {
+			spin_lock_irqsave(&fod->flock, flags);
+			fod->writedataactive = false;
+			spin_unlock_irqrestore(&fod->flock, flags);
 			nvmet_req_complete(&fod->req,
 					NVME_SC_FC_TRANSPORT_ERROR);
-		else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
+		} else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
 			fcpreq->fcp_error = ret;
 			fcpreq->transferred_length = 0;
 			nvmet_fc_xmt_fcp_op_done(fod->fcpreq);
@@ -1800,32 +1845,54 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
 	}
 }
 
+static inline bool
+__nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort)
+{
+	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
+	struct nvmet_fc_tgtport *tgtport = fod->tgtport;
+
+	/* if in the middle of an io and we need to tear down */
+	if (abort) {
+		if (fcpreq->op == NVMET_FCOP_WRITEDATA) {
+			nvmet_req_complete(&fod->req,
+					NVME_SC_FC_TRANSPORT_ERROR);
+			return true;
+		}
+
+		nvmet_fc_abort_op(tgtport, fod);
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * actual done handler for FCP operations when completed by the lldd
+ */
 static void
-nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
+nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
 {
-	struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
 	struct nvmet_fc_tgtport *tgtport = fod->tgtport;
 	unsigned long flags;
 	bool abort;
 
 	spin_lock_irqsave(&fod->flock, flags);
 	abort = fod->abort;
+	fod->writedataactive = false;
 	spin_unlock_irqrestore(&fod->flock, flags);
 
-	/* if in the middle of an io and we need to tear down */
-	if (abort && fcpreq->op != NVMET_FCOP_ABORT) {
-		/* data no longer needed */
-		nvmet_fc_free_tgt_pgs(fod);
-
-		nvmet_req_complete(&fod->req, fcpreq->fcp_error);
-		return;
-	}
-
 	switch (fcpreq->op) {
 
 	case NVMET_FCOP_WRITEDATA:
+		if (__nvmet_fc_fod_op_abort(fod, abort))
+			return;
 		if (fcpreq->fcp_error ||
 		    fcpreq->transferred_length != fcpreq->transfer_length) {
+			spin_lock(&fod->flock);
+			fod->abort = true;
+			spin_unlock(&fod->flock);
+
 			nvmet_req_complete(&fod->req,
 					NVME_SC_FC_TRANSPORT_ERROR);
 			return;
@@ -1833,6 +1900,10 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 
 		fod->offset += fcpreq->transferred_length;
 		if (fod->offset != fod->total_length) {
+			spin_lock_irqsave(&fod->flock, flags);
+			fod->writedataactive = true;
+			spin_unlock_irqrestore(&fod->flock, flags);
+
 			/* transfer the next chunk */
 			nvmet_fc_transfer_fcp_data(tgtport, fod,
 						NVMET_FCOP_WRITEDATA);
@@ -1847,12 +1918,11 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 
 	case NVMET_FCOP_READDATA:
 	case NVMET_FCOP_READDATA_RSP:
+		if (__nvmet_fc_fod_op_abort(fod, abort))
+			return;
 		if (fcpreq->fcp_error ||
 		    fcpreq->transferred_length != fcpreq->transfer_length) {
-			/* data no longer needed */
-			nvmet_fc_free_tgt_pgs(fod);
-
-			nvmet_fc_abort_op(tgtport, fod->fcpreq);
+			nvmet_fc_abort_op(tgtport, fod);
 			return;
 		}
 
@@ -1861,8 +1931,6 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 		if (fcpreq->op == NVMET_FCOP_READDATA_RSP) {
 			/* data no longer needed */
 			nvmet_fc_free_tgt_pgs(fod);
-			fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
-					sizeof(fod->rspiubuf), DMA_TO_DEVICE);
 			nvmet_fc_free_fcp_iod(fod->queue, fod);
 			return;
 		}
@@ -1885,19 +1953,38 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
 		break;
 
 	case NVMET_FCOP_RSP:
-	case NVMET_FCOP_ABORT:
-		fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
-				sizeof(fod->rspiubuf), DMA_TO_DEVICE);
+		if (__nvmet_fc_fod_op_abort(fod, abort))
+			return;
 		nvmet_fc_free_fcp_iod(fod->queue, fod);
 		break;
 
 	default:
-		nvmet_fc_free_tgt_pgs(fod);
-		nvmet_fc_abort_op(tgtport, fod->fcpreq);
 		break;
 	}
 }
 
+static void
+nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work)
+{
+	struct nvmet_fc_fcp_iod *fod =
+		container_of(work, struct nvmet_fc_fcp_iod, done_work);
+
+	nvmet_fc_fod_op_done(fod);
+}
+
+static void
+nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
+{
+	struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+	struct nvmet_fc_tgt_queue *queue = fod->queue;
+
+	if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR)
+		/* context switch so completion is not in ISR context */
+		queue_work_on(queue->cpu, queue->work_q, &fod->done_work);
+	else
+		nvmet_fc_fod_op_done(fod);
+}
+
 /*
  * actual completion handler after execution by the nvmet layer
  */
@@ -1919,10 +2006,7 @@ __nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport,
 		fod->queue->sqhd = cqe->sq_head;
 
 	if (abort) {
-		/* data no longer needed */
-		nvmet_fc_free_tgt_pgs(fod);
-
-		nvmet_fc_abort_op(tgtport, fod->fcpreq);
+		nvmet_fc_abort_op(tgtport, fod);
 		return;
 	}
 
@@ -1971,7 +2055,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req)
 /*
  * Actual processing routine for received FC-NVME LS Requests from the LLD
  */
-void
+static void
 nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 			struct nvmet_fc_fcp_iod *fod)
 {
@@ -2018,8 +2102,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 				&fod->queue->nvme_cq,
 				&fod->queue->nvme_sq,
 				&nvmet_fc_tgt_fcp_ops);
-	if (!ret) {	/* bad SQE content */
-		nvmet_fc_abort_op(tgtport, fod->fcpreq);
+	if (!ret) {	/* bad SQE content or invalid ctrl state */
+		nvmet_fc_abort_op(tgtport, fod);
 		return;
 	}
 
@@ -2059,7 +2143,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	return;
 
 transport_error:
-	nvmet_fc_abort_op(tgtport, fod->fcpreq);
+	nvmet_fc_abort_op(tgtport, fod);
 }
 
 /*
@@ -2089,7 +2173,7 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
  * If this routine returns error, the lldd should abort the exchange.
  *
  * @target_port: pointer to the (registered) target port the FCP CMD IU
- *              was receive on.
+ *              was received on.
  * @fcpreq:     pointer to a fcpreq request structure to be used to reference
  *              the exchange corresponding to the FCP Exchange.
  * @cmdiubuf:   pointer to the buffer containing the FCP CMD IU
@@ -2112,7 +2196,6 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 			(be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4)))
 		return -EIO;
 
-
 	queue = nvmet_fc_find_target_queue(tgtport,
 				be64_to_cpu(cmdiu->connection_id));
 	if (!queue)
@@ -2142,12 +2225,68 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
 	memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
 
-	queue_work_on(queue->cpu, queue->work_q, &fod->work);
+	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
+		queue_work_on(queue->cpu, queue->work_q, &fod->work);
+	else
+		nvmet_fc_handle_fcp_rqst(tgtport, fod);
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req);
 
+/**
+ * nvmet_fc_rcv_fcp_abort - transport entry point called by an LLDD
+ *                       upon the reception of an ABTS for a FCP command
+ *
+ * Notify the transport that an ABTS has been received for a FCP command
+ * that had been given to the transport via nvmet_fc_rcv_fcp_req(). The
+ * LLDD believes the command is still being worked on
+ * (template_ops->fcp_req_release() has not been called).
+ *
+ * The transport will wait for any outstanding work (an op to the LLDD,
+ * which the lldd should complete with error due to the ABTS; or the
+ * completion from the nvmet layer of the nvme command), then will
+ * stop processing and call the nvmet_fc_rcv_fcp_req() callback to
+ * return the i/o context to the LLDD.  The LLDD may send the BA_ACC
+ * to the ABTS either after return from this function (assuming any
+ * outstanding op work has been terminated) or upon the callback being
+ * called.
+ *
+ * @target_port: pointer to the (registered) target port the FCP CMD IU
+ *              was received on.
+ * @fcpreq:     pointer to the fcpreq request structure that corresponds
+ *              to the exchange that received the ABTS.
+ */
+void
+nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port,
+			struct nvmefc_tgt_fcp_req *fcpreq)
+{
+	struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+	struct nvmet_fc_tgt_queue *queue;
+	unsigned long flags;
+
+	if (!fod || fod->fcpreq != fcpreq)
+		/* job appears to have already completed, ignore abort */
+		return;
+
+	queue = fod->queue;
+
+	spin_lock_irqsave(&queue->qlock, flags);
+	if (fod->active) {
+		/*
+		 * mark as abort. The abort handler, invoked upon completion
+		 * of any work, will detect the aborted status and do the
+		 * callback.
+		 */
+		spin_lock(&fod->flock);
+		fod->abort = true;
+		fod->aborted = true;
+		spin_unlock(&fod->flock);
+	}
+	spin_unlock_irqrestore(&queue->qlock, flags);
+}
+EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort);
+
 enum {
 	FCT_TRADDR_ERR		= 0,
 	FCT_TRADDR_WWNN		= 1 << 0,
@@ -2177,7 +2316,7 @@ nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf)
 	if (!options)
 		return -ENOMEM;
 
-	while ((p = strsep(&o, ",\n")) != NULL) {
+	while ((p = strsep(&o, ":\n")) != NULL) {
 		if (!*p)
 			continue;
 
@@ -2238,6 +2377,7 @@ nvmet_fc_add_port(struct nvmet_port *port)
 			if (!tgtport->port) {
 				tgtport->port = port;
 				port->priv = tgtport;
+				nvmet_fc_tgtport_get(tgtport);
 				ret = 0;
 			} else
 				ret = -EALREADY;
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 4e8e6a22bce1..15551ef79c8c 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -246,11 +246,19 @@ struct fcloop_lsreq {
 struct fcloop_fcpreq {
 	struct fcloop_tport		*tport;
 	struct nvmefc_fcp_req		*fcpreq;
+	spinlock_t			reqlock;
 	u16				status;
+	bool				active;
+	bool				aborted;
 	struct work_struct		work;
 	struct nvmefc_tgt_fcp_req	tgt_fcp_req;
 };
 
+struct fcloop_ini_fcpreq {
+	struct nvmefc_fcp_req		*fcpreq;
+	struct fcloop_fcpreq		*tfcp_req;
+	struct work_struct		iniwork;
+};
 
 static inline struct fcloop_lsreq *
 tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq)
@@ -341,7 +349,21 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport,
 }
 
 /*
- * FCP IO operation done. call back up initiator "done" flows.
+ * FCP IO operation done by initiator abort.
+ * call back up initiator "done" flows.
+ */
+static void
+fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work)
+{
+	struct fcloop_ini_fcpreq *inireq =
+		container_of(work, struct fcloop_ini_fcpreq, iniwork);
+
+	inireq->fcpreq->done(inireq->fcpreq);
+}
+
+/*
+ * FCP IO operation done by target completion.
+ * call back up initiator "done" flows.
  */
 static void
 fcloop_tgt_fcprqst_done_work(struct work_struct *work)
@@ -349,12 +371,18 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work)
 	struct fcloop_fcpreq *tfcp_req =
 		container_of(work, struct fcloop_fcpreq, work);
 	struct fcloop_tport *tport = tfcp_req->tport;
-	struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
+	struct nvmefc_fcp_req *fcpreq;
 
-	if (tport->remoteport) {
+	spin_lock(&tfcp_req->reqlock);
+	fcpreq = tfcp_req->fcpreq;
+	spin_unlock(&tfcp_req->reqlock);
+
+	if (tport->remoteport && fcpreq) {
 		fcpreq->status = tfcp_req->status;
 		fcpreq->done(fcpreq);
 	}
+
+	kfree(tfcp_req);
 }
 
 
@@ -364,20 +392,25 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport,
 			void *hw_queue_handle,
 			struct nvmefc_fcp_req *fcpreq)
 {
-	struct fcloop_fcpreq *tfcp_req = fcpreq->private;
 	struct fcloop_rport *rport = remoteport->private;
+	struct fcloop_ini_fcpreq *inireq = fcpreq->private;
+	struct fcloop_fcpreq *tfcp_req;
 	int ret = 0;
 
-	INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work);
+	if (!rport->targetport)
+		return -ECONNREFUSED;
 
-	if (!rport->targetport) {
-		tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR;
-		schedule_work(&tfcp_req->work);
-		return ret;
-	}
+	tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL);
+	if (!tfcp_req)
+		return -ENOMEM;
 
+	inireq->fcpreq = fcpreq;
+	inireq->tfcp_req = tfcp_req;
+	INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work);
 	tfcp_req->fcpreq = fcpreq;
 	tfcp_req->tport = rport->targetport->private;
+	spin_lock_init(&tfcp_req->reqlock);
+	INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work);
 
 	ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req,
 				 fcpreq->cmdaddr, fcpreq->cmdlen);
@@ -444,63 +477,129 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
 			struct nvmefc_tgt_fcp_req *tgt_fcpreq)
 {
 	struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
-	struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
+	struct nvmefc_fcp_req *fcpreq;
 	u32 rsplen = 0, xfrlen = 0;
-	int fcp_err = 0;
+	int fcp_err = 0, active, aborted;
 	u8 op = tgt_fcpreq->op;
 
+	spin_lock(&tfcp_req->reqlock);
+	fcpreq = tfcp_req->fcpreq;
+	active = tfcp_req->active;
+	aborted = tfcp_req->aborted;
+	tfcp_req->active = true;
+	spin_unlock(&tfcp_req->reqlock);
+
+	if (unlikely(active))
+		/* illegal - call while i/o active */
+		return -EALREADY;
+
+	if (unlikely(aborted)) {
+		/* target transport has aborted i/o prior */
+		spin_lock(&tfcp_req->reqlock);
+		tfcp_req->active = false;
+		spin_unlock(&tfcp_req->reqlock);
+		tgt_fcpreq->transferred_length = 0;
+		tgt_fcpreq->fcp_error = -ECANCELED;
+		tgt_fcpreq->done(tgt_fcpreq);
+		return 0;
+	}
+
+	/*
+	 * if fcpreq is NULL, the I/O has been aborted (from
+	 * initiator side). For the target side, act as if all is well
+	 * but don't actually move data.
+	 */
+
 	switch (op) {
 	case NVMET_FCOP_WRITEDATA:
 		xfrlen = tgt_fcpreq->transfer_length;
-		fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl,
-					tgt_fcpreq->offset, xfrlen);
-		fcpreq->transferred_length += xfrlen;
+		if (fcpreq) {
+			fcloop_fcp_copy_data(op, tgt_fcpreq->sg,
+					fcpreq->first_sgl, tgt_fcpreq->offset,
+					xfrlen);
+			fcpreq->transferred_length += xfrlen;
+		}
 		break;
 
 	case NVMET_FCOP_READDATA:
 	case NVMET_FCOP_READDATA_RSP:
 		xfrlen = tgt_fcpreq->transfer_length;
-		fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl,
-					tgt_fcpreq->offset, xfrlen);
-		fcpreq->transferred_length += xfrlen;
+		if (fcpreq) {
+			fcloop_fcp_copy_data(op, tgt_fcpreq->sg,
+					fcpreq->first_sgl, tgt_fcpreq->offset,
+					xfrlen);
+			fcpreq->transferred_length += xfrlen;
+		}
 		if (op == NVMET_FCOP_READDATA)
 			break;
 
 		/* Fall-Thru to RSP handling */
 
 	case NVMET_FCOP_RSP:
-		rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ?
-				fcpreq->rsplen : tgt_fcpreq->rsplen);
-		memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen);
-		if (rsplen < tgt_fcpreq->rsplen)
-			fcp_err = -E2BIG;
-		fcpreq->rcv_rsplen = rsplen;
-		fcpreq->status = 0;
+		if (fcpreq) {
+			rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ?
+					fcpreq->rsplen : tgt_fcpreq->rsplen);
+			memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen);
+			if (rsplen < tgt_fcpreq->rsplen)
+				fcp_err = -E2BIG;
+			fcpreq->rcv_rsplen = rsplen;
+			fcpreq->status = 0;
+		}
 		tfcp_req->status = 0;
 		break;
 
-	case NVMET_FCOP_ABORT:
-		tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
-		break;
-
 	default:
 		fcp_err = -EINVAL;
 		break;
 	}
 
+	spin_lock(&tfcp_req->reqlock);
+	tfcp_req->active = false;
+	spin_unlock(&tfcp_req->reqlock);
+
 	tgt_fcpreq->transferred_length = xfrlen;
 	tgt_fcpreq->fcp_error = fcp_err;
 	tgt_fcpreq->done(tgt_fcpreq);
 
-	if ((!fcp_err) && (op == NVMET_FCOP_RSP ||
-			op == NVMET_FCOP_READDATA_RSP ||
-			op == NVMET_FCOP_ABORT))
-		schedule_work(&tfcp_req->work);
-
 	return 0;
 }
 
 static void
+fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_fcp_req *tgt_fcpreq)
+{
+	struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
+	int active;
+
+	/*
+	 * mark aborted only in case there were 2 threads in transport
+	 * (one doing io, other doing abort) and only kills ops posted
+	 * after the abort request
+	 */
+	spin_lock(&tfcp_req->reqlock);
+	active = tfcp_req->active;
+	tfcp_req->aborted = true;
+	spin_unlock(&tfcp_req->reqlock);
+
+	tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
+
+	/*
+	 * nothing more to do. If io wasn't active, the transport should
+	 * immediately call the req_release. If it was active, the op
+	 * will complete, and the lldd should call req_release.
+	 */
+}
+
+static void
+fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_fcp_req *tgt_fcpreq)
+{
+	struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
+
+	schedule_work(&tfcp_req->work);
+}
+
+static void
 fcloop_ls_abort(struct nvme_fc_local_port *localport,
 			struct nvme_fc_remote_port *remoteport,
 				struct nvmefc_ls_req *lsreq)
@@ -513,6 +612,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
 			void *hw_queue_handle,
 			struct nvmefc_fcp_req *fcpreq)
 {
+	struct fcloop_rport *rport = remoteport->private;
+	struct fcloop_ini_fcpreq *inireq = fcpreq->private;
+	struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req;
+
+	if (!tfcp_req)
+		/* abort has already been called */
+		return;
+
+	if (rport->targetport)
+		nvmet_fc_rcv_fcp_abort(rport->targetport,
+					&tfcp_req->tgt_fcp_req);
+
+	/* break initiator/target relationship for io */
+	spin_lock(&tfcp_req->reqlock);
+	inireq->tfcp_req = NULL;
+	tfcp_req->fcpreq = NULL;
+	spin_unlock(&tfcp_req->reqlock);
+
+	/* post the aborted io completion */
+	fcpreq->status = -ECANCELED;
+	schedule_work(&inireq->iniwork);
 }
 
 static void
@@ -546,7 +666,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
 #define	FCLOOP_SGL_SEGS			256
 #define FCLOOP_DMABOUND_4G		0xFFFFFFFF
 
-struct nvme_fc_port_template fctemplate = {
+static struct nvme_fc_port_template fctemplate = {
 	.localport_delete	= fcloop_localport_delete,
 	.remoteport_delete	= fcloop_remoteport_delete,
 	.create_queue		= fcloop_create_queue,
@@ -563,20 +683,23 @@ struct nvme_fc_port_template fctemplate = {
 	.local_priv_sz		= sizeof(struct fcloop_lport),
 	.remote_priv_sz		= sizeof(struct fcloop_rport),
 	.lsrqst_priv_sz		= sizeof(struct fcloop_lsreq),
-	.fcprqst_priv_sz	= sizeof(struct fcloop_fcpreq),
+	.fcprqst_priv_sz	= sizeof(struct fcloop_ini_fcpreq),
 };
 
-struct nvmet_fc_target_template tgttemplate = {
+static struct nvmet_fc_target_template tgttemplate = {
 	.targetport_delete	= fcloop_targetport_delete,
 	.xmt_ls_rsp		= fcloop_xmt_ls_rsp,
 	.fcp_op			= fcloop_fcp_op,
+	.fcp_abort		= fcloop_tgt_fcp_abort,
+	.fcp_req_release	= fcloop_fcp_req_release,
 	.max_hw_queues		= FCLOOP_HW_QUEUES,
 	.max_sgl_segments	= FCLOOP_SGL_SEGS,
 	.max_dif_sgl_segments	= FCLOOP_SGL_SEGS,
 	.dma_boundary		= FCLOOP_DMABOUND_4G,
 	/* optional features */
-	.target_features	= NVMET_FCTGTFEAT_READDATA_RSP |
-				  NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED,
+	.target_features	= NVMET_FCTGTFEAT_CMD_IN_ISR |
+				  NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
+				  NVMET_FCTGTFEAT_OPDONE_IN_ISR,
 	/* sizes of additional private data for data structures */
 	.target_priv_sz		= sizeof(struct fcloop_tport),
 };
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 6b0baa9caab9..c77940d80fc8 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -184,7 +184,7 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
 		(req->ns->blksize_shift - 9)) + 1;
 
 	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
-				GFP_KERNEL, &bio, true))
+				GFP_KERNEL, &bio, 0))
 		status = NVME_SC_INTERNAL | NVME_SC_DNR;
 
 	if (bio) {
@@ -196,26 +196,19 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
 	}
 }
 
-int nvmet_parse_io_cmd(struct nvmet_req *req)
+u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
+	u16 ret;
 
-	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
-		pr_err("nvmet: got io cmd %d while CC.EN == 0\n",
-				cmd->common.opcode);
+	ret = nvmet_check_ctrl_status(req, cmd);
+	if (unlikely(ret)) {
 		req->ns = NULL;
-		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
-	}
-
-	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
-		pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n",
-				cmd->common.opcode);
-		req->ns = NULL;
-		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+		return ret;
 	}
 
 	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
-	if (!req->ns)
+	if (unlikely(!req->ns))
 		return NVME_SC_INVALID_NS | NVME_SC_DNR;
 
 	switch (cmd->common.opcode) {
@@ -237,7 +230,8 @@ int nvmet_parse_io_cmd(struct nvmet_req *req)
 		req->execute = nvmet_execute_write_zeroes;
 		return 0;
 	default:
-		pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+		pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+		       req->sq->qid);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 	}
 }
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index c7b0b6a52708..304f1c87c160 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -13,12 +13,10 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/scatterlist.h>
-#include <linux/delay.h>
 #include <linux/blk-mq.h>
 #include <linux/nvme.h>
 #include <linux/module.h>
 #include <linux/parser.h>
-#include <linux/t10-pi.h>
 #include "nvmet.h"
 #include "../host/nvme.h"
 #include "../host/fabrics.h"
@@ -93,31 +91,26 @@ static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
 static void nvme_loop_complete_rq(struct request *req)
 {
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
-	int error = 0;
 
 	nvme_cleanup_cmd(req);
 	sg_free_table_chained(&iod->sg_table, true);
+	nvme_complete_rq(req);
+}
 
-	if (unlikely(req->errors)) {
-		if (nvme_req_needs_retry(req, req->errors)) {
-			nvme_requeue_req(req);
-			return;
-		}
-
-		if (blk_rq_is_passthrough(req))
-			error = req->errors;
-		else
-			error = nvme_error_status(req->errors);
-	}
+static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue)
+{
+	u32 queue_idx = nvme_loop_queue_idx(queue);
 
-	blk_mq_end_request(req, error);
+	if (queue_idx == 0)
+		return queue->ctrl->admin_tag_set.tags[queue_idx];
+	return queue->ctrl->tag_set.tags[queue_idx - 1];
 }
 
 static void nvme_loop_queue_response(struct nvmet_req *req)
 {
-	struct nvme_loop_iod *iod =
-		container_of(req, struct nvme_loop_iod, req);
-	struct nvme_completion *cqe = &iod->rsp;
+	struct nvme_loop_queue *queue =
+		container_of(req->sq, struct nvme_loop_queue, nvme_sq);
+	struct nvme_completion *cqe = req->rsp;
 
 	/*
 	 * AEN requests are special as they don't time out and can
@@ -125,15 +118,22 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
 	 * aborts.  We don't even bother to allocate a struct request
 	 * for them but rather special case them here.
 	 */
-	if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 &&
+	if (unlikely(nvme_loop_queue_idx(queue) == 0 &&
 			cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) {
-		nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status,
+		nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
 				&cqe->result);
 	} else {
-		struct request *rq = blk_mq_rq_from_pdu(iod);
+		struct request *rq;
+
+		rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id);
+		if (!rq) {
+			dev_err(queue->ctrl->ctrl.device,
+				"tag 0x%x on queue %d not found\n",
+				cqe->command_id, nvme_loop_queue_idx(queue));
+			return;
+		}
 
-		iod->nvme_req.result = cqe->result;
-		blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
+		nvme_end_request(rq, cqe->status, cqe->result);
 	}
 }
 
@@ -154,7 +154,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	schedule_work(&iod->queue->ctrl->reset_work);
 
 	/* fail with DNR on admin cmd timeout */
-	rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
+	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
 
 	return BLK_EH_HANDLED;
 }
@@ -268,7 +268,7 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	return 0;
 }
 
-static struct blk_mq_ops nvme_loop_mq_ops = {
+static const struct blk_mq_ops nvme_loop_mq_ops = {
 	.queue_rq	= nvme_loop_queue_rq,
 	.complete	= nvme_loop_complete_rq,
 	.init_request	= nvme_loop_init_request,
@@ -276,7 +276,7 @@ static struct blk_mq_ops nvme_loop_mq_ops = {
 	.timeout	= nvme_loop_timeout,
 };
 
-static struct blk_mq_ops nvme_loop_admin_mq_ops = {
+static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
 	.queue_rq	= nvme_loop_queue_rq,
 	.complete	= nvme_loop_complete_rq,
 	.init_request	= nvme_loop_init_admin_request,
@@ -349,6 +349,19 @@ out_destroy_queues:
 	return ret;
 }
 
+static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+	int i, ret;
+
+	for (i = 1; i < ctrl->queue_count; i++) {
+		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 {
 	int error;
@@ -490,7 +503,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	struct nvme_loop_ctrl *ctrl = container_of(work,
 					struct nvme_loop_ctrl, reset_work);
 	bool changed;
-	int i, ret;
+	int ret;
 
 	nvme_loop_shutdown_ctrl(ctrl);
 
@@ -502,11 +515,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto out_destroy_admin;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
-		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
-		if (ret)
-			goto out_destroy_io;
-	}
+	ret = nvme_loop_connect_io_queues(ctrl);
+	if (ret)
+		goto out_destroy_io;
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
@@ -559,7 +570,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 {
-	int ret, i;
+	int ret;
 
 	ret = nvme_loop_init_io_queues(ctrl);
 	if (ret)
@@ -588,11 +599,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 		goto out_free_tagset;
 	}
 
-	for (i = 1; i < ctrl->queue_count; i++) {
-		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
-		if (ret)
-			goto out_cleanup_connect_q;
-	}
+	ret = nvme_loop_connect_io_queues(ctrl);
+	if (ret)
+		goto out_cleanup_connect_q;
 
 	return 0;
 
@@ -736,7 +745,12 @@ static int __init nvme_loop_init_module(void)
 	ret = nvmet_register_transport(&nvme_loop_ops);
 	if (ret)
 		return ret;
-	return nvmf_register_transport(&nvme_loop_transport);
+
+	ret = nvmf_register_transport(&nvme_loop_transport);
+	if (ret)
+		nvmet_unregister_transport(&nvme_loop_ops);
+
+	return ret;
 }
 
 static void __exit nvme_loop_cleanup_module(void)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index f7ff15f17ca9..7cb77ba5993b 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -253,11 +253,11 @@ struct nvmet_async_event {
 	u8			log_page;
 };
 
-int nvmet_parse_connect_cmd(struct nvmet_req *req);
-int nvmet_parse_io_cmd(struct nvmet_req *req);
-int nvmet_parse_admin_cmd(struct nvmet_req *req);
-int nvmet_parse_discovery_cmd(struct nvmet_req *req);
-int nvmet_parse_fabrics_cmd(struct nvmet_req *req);
+u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
+u16 nvmet_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
+u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
+u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
 
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
@@ -278,6 +278,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 		struct nvmet_req *req, struct nvmet_ctrl **ret);
 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
+u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd);
 
 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 		enum nvme_subsys_type type);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index ecc4fe862561..99c69018a35f 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1199,6 +1199,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 	}
 	queue->port = cm_id->context;
 
+	if (queue->host_qid == 0) {
+		/* Let inflight controller teardown complete */
+		flush_scheduled_work();
+	}
+
 	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
 	if (ret)
 		goto release_queue;
@@ -1427,12 +1432,16 @@ restart:
 static int nvmet_rdma_add_port(struct nvmet_port *port)
 {
 	struct rdma_cm_id *cm_id;
-	struct sockaddr_in addr_in;
-	u16 port_in;
+	struct sockaddr_storage addr = { };
+	__kernel_sa_family_t af;
 	int ret;
 
 	switch (port->disc_addr.adrfam) {
 	case NVMF_ADDR_FAMILY_IP4:
+		af = AF_INET;
+		break;
+	case NVMF_ADDR_FAMILY_IP6:
+		af = AF_INET6;
 		break;
 	default:
 		pr_err("address family %d not supported\n",
@@ -1440,13 +1449,13 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 		return -EINVAL;
 	}
 
-	ret = kstrtou16(port->disc_addr.trsvcid, 0, &port_in);
-	if (ret)
+	ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
+			port->disc_addr.trsvcid, &addr);
+	if (ret) {
+		pr_err("malformed ip/port passed: %s:%s\n",
+			port->disc_addr.traddr, port->disc_addr.trsvcid);
 		return ret;
-
-	addr_in.sin_family = AF_INET;
-	addr_in.sin_addr.s_addr = in_aton(port->disc_addr.traddr);
-	addr_in.sin_port = htons(port_in);
+	}
 
 	cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
 			RDMA_PS_TCP, IB_QPT_RC);
@@ -1455,20 +1464,32 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
 		return PTR_ERR(cm_id);
 	}
 
-	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr_in);
+	/*
+	 * Allow both IPv4 and IPv6 sockets to bind a single port
+	 * at the same time.
+	 */
+	ret = rdma_set_afonly(cm_id, 1);
+	if (ret) {
+		pr_err("rdma_set_afonly failed (%d)\n", ret);
+		goto out_destroy_id;
+	}
+
+	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr);
 	if (ret) {
-		pr_err("binding CM ID to %pISpc failed (%d)\n", &addr_in, ret);
+		pr_err("binding CM ID to %pISpcs failed (%d)\n",
+			(struct sockaddr *)&addr, ret);
 		goto out_destroy_id;
 	}
 
 	ret = rdma_listen(cm_id, 128);
 	if (ret) {
-		pr_err("listening to %pISpc failed (%d)\n", &addr_in, ret);
+		pr_err("listening to %pISpcs failed (%d)\n",
+			(struct sockaddr *)&addr, ret);
 		goto out_destroy_id;
 	}
 
-	pr_info("enabling port %d (%pISpc)\n",
-		le16_to_cpu(port->disc_addr.portid), &addr_in);
+	pr_info("enabling port %d (%pISpcs)\n",
+		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
 	port->priv = cm_id;
 	return 0;
 
diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c
index fd66a3199db7..cf9d6a9d9fd4 100644
--- a/drivers/pci/dwc/pcie-hisi.c
+++ b/drivers/pci/dwc/pcie-hisi.c
@@ -380,9 +380,13 @@ struct pci_ecam_ops hisi_pcie_platform_ops = {
 
 static const struct of_device_id hisi_pcie_almost_ecam_of_match[] = {
 	{
-		.compatible = "hisilicon,pcie-almost-ecam",
+		.compatible =  "hisilicon,hip06-pcie-ecam",
 		.data	    = (void *) &hisi_pcie_platform_ops,
 	},
+	{
+		.compatible =  "hisilicon,hip07-pcie-ecam",
+		.data       = (void *) &hisi_pcie_platform_ops,
+	},
 	{},
 };
 
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 4bc88eb52712..e1bffc9bb194 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -141,6 +141,14 @@ config DELL_WMI_AIO
 	  To compile this driver as a module, choose M here: the module will
 	  be called dell-wmi-aio.
 
+config DELL_WMI_LED
+	tristate "External LED on Dell Business Netbooks"
+	depends on LEDS_CLASS
+	depends on ACPI_WMI
+	help
+	  This adds support for the Latitude 2100 and similar
+	  notebooks that have an external LED.
+
 config DELL_SMO8800
 	tristate "Dell Latitude freefall driver (ACPI SMO88XX)"
 	depends on ACPI
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 299d0f9e40f7..776b3a7a4984 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_DELL_SMBIOS)	+= dell-smbios.o
 obj-$(CONFIG_DELL_LAPTOP)	+= dell-laptop.o
 obj-$(CONFIG_DELL_WMI)		+= dell-wmi.o
 obj-$(CONFIG_DELL_WMI_AIO)	+= dell-wmi-aio.o
+obj-$(CONFIG_DELL_WMI_LED)	+= dell-wmi-led.o
 obj-$(CONFIG_DELL_SMO8800)	+= dell-smo8800.o
 obj-$(CONFIG_DELL_RBTN)		+= dell-rbtn.o
 obj-$(CONFIG_ACER_WMI)		+= acer-wmi.o
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index f57dd282a002..2e237bad4995 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/i8042.h>
 #include <linux/debugfs.h>
+#include <linux/dell-led.h>
 #include <linux/seq_file.h>
 #include <acpi/video.h>
 #include "dell-rbtn.h"
@@ -42,6 +43,8 @@
 #define KBD_LED_AUTO_50_TOKEN 0x02EB
 #define KBD_LED_AUTO_75_TOKEN 0x02EC
 #define KBD_LED_AUTO_100_TOKEN 0x02F6
+#define GLOBAL_MIC_MUTE_ENABLE 0x0364
+#define GLOBAL_MIC_MUTE_DISABLE 0x0365
 
 struct quirk_entry {
 	u8 touchpad_led;
@@ -1978,6 +1981,31 @@ static void kbd_led_exit(void)
 	led_classdev_unregister(&kbd_led);
 }
 
+int dell_micmute_led_set(int state)
+{
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
+
+	if (state == 0)
+		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE);
+	else if (state == 1)
+		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE);
+	else
+		return -EINVAL;
+
+	if (!token)
+		return -ENODEV;
+
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
+	buffer->input[1] = token->value;
+	dell_smbios_send_request(1, 0);
+	dell_smbios_release_buffer();
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(dell_micmute_led_set);
+
 static int __init dell_init(void)
 {
 	struct calling_interface_buffer *buffer;
diff --git a/drivers/leds/dell-led.c b/drivers/platform/x86/dell-wmi-led.c
index b3d6e9c15cf9..a0c7e99530ef 100644
--- a/drivers/leds/dell-led.c
+++ b/drivers/platform/x86/dell-wmi-led.c
@@ -1,6 +1,4 @@
 /*
- * dell_led.c - Dell LED Driver
- *
  * Copyright (C) 2010 Dell Inc.
  * Louis Davis <louis_davis@dell.com>
  * Jim Dailey <jim_dailey@dell.com>
@@ -15,16 +13,12 @@
 #include <linux/leds.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/dmi.h>
-#include <linux/dell-led.h>
-#include "../platform/x86/dell-smbios.h"
 
 MODULE_AUTHOR("Louis Davis/Jim Dailey");
 MODULE_DESCRIPTION("Dell LED Control Driver");
 MODULE_LICENSE("GPL");
 
 #define DELL_LED_BIOS_GUID "F6E4FE6E-909D-47cb-8BAB-C9F6F2F8D396"
-#define DELL_APP_GUID "A80593CE-A997-11DA-B012-B622A1EF5492"
 MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 
 /* Error Result Codes: */
@@ -43,53 +37,6 @@ MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 #define CMD_LED_OFF	17
 #define CMD_LED_BLINK	18
 
-#define GLOBAL_MIC_MUTE_ENABLE	0x364
-#define GLOBAL_MIC_MUTE_DISABLE	0x365
-
-static int dell_micmute_led_set(int state)
-{
-	struct calling_interface_buffer *buffer;
-	struct calling_interface_token *token;
-
-	if (!wmi_has_guid(DELL_APP_GUID))
-		return -ENODEV;
-
-	if (state == 0)
-		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE);
-	else if (state == 1)
-		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE);
-	else
-		return -EINVAL;
-
-	if (!token)
-		return -ENODEV;
-
-	buffer = dell_smbios_get_buffer();
-	buffer->input[0] = token->location;
-	buffer->input[1] = token->value;
-	dell_smbios_send_request(1, 0);
-	dell_smbios_release_buffer();
-
-	return state;
-}
-
-int dell_app_wmi_led_set(int whichled, int on)
-{
-	int state = 0;
-
-	switch (whichled) {
-	case DELL_LED_MICMUTE:
-		state = dell_micmute_led_set(on);
-		break;
-	default:
-		pr_warn("led type %x is not supported\n", whichled);
-		break;
-	}
-
-	return state;
-}
-EXPORT_SYMBOL_GPL(dell_app_wmi_led_set);
-
 struct bios_args {
 	unsigned char length;
 	unsigned char result_code;
@@ -99,37 +46,29 @@ struct bios_args {
 	unsigned char off_time;
 };
 
-static int dell_led_perform_fn(u8 length,
-		u8 result_code,
-		u8 device_id,
-		u8 command,
-		u8 on_time,
-		u8 off_time)
+static int dell_led_perform_fn(u8 length, u8 result_code, u8 device_id,
+			       u8 command, u8 on_time, u8 off_time)
 {
-	struct bios_args *bios_return;
-	u8 return_code;
-	union acpi_object *obj;
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct bios_args *bios_return;
 	struct acpi_buffer input;
+	union acpi_object *obj;
 	acpi_status status;
+	u8 return_code;
 
-	struct bios_args args;
-	args.length = length;
-	args.result_code = result_code;
-	args.device_id = device_id;
-	args.command = command;
-	args.on_time = on_time;
-	args.off_time = off_time;
+	struct bios_args args = {
+		.length = length,
+		.result_code = result_code,
+		.device_id = device_id,
+		.command = command,
+		.on_time = on_time,
+		.off_time = off_time
+	};
 
 	input.length = sizeof(struct bios_args);
 	input.pointer = &args;
 
-	status = wmi_evaluate_method(DELL_LED_BIOS_GUID,
-		1,
-		1,
-		&input,
-		&output);
-
+	status = wmi_evaluate_method(DELL_LED_BIOS_GUID, 1, 1, &input, &output);
 	if (ACPI_FAILURE(status))
 		return status;
 
@@ -137,7 +76,7 @@ static int dell_led_perform_fn(u8 length,
 
 	if (!obj)
 		return -EINVAL;
-	else if (obj->type != ACPI_TYPE_BUFFER) {
+	if (obj->type != ACPI_TYPE_BUFFER) {
 		kfree(obj);
 		return -EINVAL;
 	}
@@ -170,8 +109,7 @@ static int led_off(void)
 		0);			/* not used */
 }
 
-static int led_blink(unsigned char on_eighths,
-		unsigned char off_eighths)
+static int led_blink(unsigned char on_eighths, unsigned char off_eighths)
 {
 	return dell_led_perform_fn(5,	/* Length of command */
 		INTERFACE_ERROR,	/* Init to  INTERFACE_ERROR */
@@ -182,7 +120,7 @@ static int led_blink(unsigned char on_eighths,
 }
 
 static void dell_led_set(struct led_classdev *led_cdev,
-		enum led_brightness value)
+			 enum led_brightness value)
 {
 	if (value == LED_OFF)
 		led_off();
@@ -191,27 +129,22 @@ static void dell_led_set(struct led_classdev *led_cdev,
 }
 
 static int dell_led_blink(struct led_classdev *led_cdev,
-		unsigned long *delay_on,
-		unsigned long *delay_off)
+			  unsigned long *delay_on, unsigned long *delay_off)
 {
 	unsigned long on_eighths;
 	unsigned long off_eighths;
 
-	/* The Dell LED delay is based on 125ms intervals.
-	   Need to round up to next interval. */
+	/*
+	 * The Dell LED delay is based on 125ms intervals.
+	 * Need to round up to next interval.
+	 */
 
-	on_eighths = (*delay_on + 124) / 125;
-	if (0 == on_eighths)
-		on_eighths = 1;
-	if (on_eighths > 255)
-		on_eighths = 255;
+	on_eighths = DIV_ROUND_UP(*delay_on, 125);
+	on_eighths = clamp_t(unsigned long, on_eighths, 1, 255);
 	*delay_on = on_eighths * 125;
 
-	off_eighths = (*delay_off + 124) / 125;
-	if (0 == off_eighths)
-		off_eighths = 1;
-	if (off_eighths > 255)
-		off_eighths = 255;
+	off_eighths = DIV_ROUND_UP(*delay_off, 125);
+	off_eighths = clamp_t(unsigned long, off_eighths, 1, 255);
 	*delay_off = off_eighths * 125;
 
 	led_blink(on_eighths, off_eighths);
@@ -232,29 +165,21 @@ static int __init dell_led_init(void)
 {
 	int error = 0;
 
-	if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID))
+	if (!wmi_has_guid(DELL_LED_BIOS_GUID))
 		return -ENODEV;
 
-	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
-		error = led_off();
-		if (error != 0)
-			return -ENODEV;
-
-		error = led_classdev_register(NULL, &dell_led);
-	}
+	error = led_off();
+	if (error != 0)
+		return -ENODEV;
 
-	return error;
+	return led_classdev_register(NULL, &dell_led);
 }
 
 static void __exit dell_led_exit(void)
 {
-	int error = 0;
+	led_classdev_unregister(&dell_led);
 
-	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
-		error = led_off();
-		if (error == 0)
-			led_classdev_unregister(&dell_led);
-	}
+	led_off();
 }
 
 module_init(dell_led_init);
diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 56bce1908be2..85812521b6ba 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c
@@ -43,6 +43,10 @@
 #define RK3288_SOC_CON2_FLASH0		BIT(7)
 #define RK3288_SOC_FLASH_SUPPLY_NUM	2
 
+#define RK3328_SOC_CON4			0x410
+#define RK3328_SOC_CON4_VCCIO2		BIT(7)
+#define RK3328_SOC_VCCIO2_SUPPLY_NUM	1
+
 #define RK3368_SOC_CON15		0x43c
 #define RK3368_SOC_CON15_FLASH0		BIT(14)
 #define RK3368_SOC_FLASH_SUPPLY_NUM	2
@@ -166,6 +170,25 @@ static void rk3288_iodomain_init(struct rockchip_iodomain *iod)
 		dev_warn(iod->dev, "couldn't update flash0 ctrl\n");
 }
 
+static void rk3328_iodomain_init(struct rockchip_iodomain *iod)
+{
+	int ret;
+	u32 val;
+
+	/* if no vccio2 supply we should leave things alone */
+	if (!iod->supplies[RK3328_SOC_VCCIO2_SUPPLY_NUM].reg)
+		return;
+
+	/*
+	 * set vccio2 iodomain to also use this framework
+	 * instead of a special gpio.
+	 */
+	val = RK3328_SOC_CON4_VCCIO2 | (RK3328_SOC_CON4_VCCIO2 << 16);
+	ret = regmap_write(iod->grf, RK3328_SOC_CON4, val);
+	if (ret < 0)
+		dev_warn(iod->dev, "couldn't update vccio2 vsel ctrl\n");
+}
+
 static void rk3368_iodomain_init(struct rockchip_iodomain *iod)
 {
 	int ret;
@@ -247,6 +270,20 @@ static const struct rockchip_iodomain_soc_data soc_data_rk3288 = {
 	.init = rk3288_iodomain_init,
 };
 
+static const struct rockchip_iodomain_soc_data soc_data_rk3328 = {
+	.grf_offset = 0x410,
+	.supply_names = {
+		"vccio1",
+		"vccio2",
+		"vccio3",
+		"vccio4",
+		"vccio5",
+		"vccio6",
+		"pmuio",
+	},
+	.init = rk3328_iodomain_init,
+};
+
 static const struct rockchip_iodomain_soc_data soc_data_rk3368 = {
 	.grf_offset = 0x900,
 	.supply_names = {
@@ -312,6 +349,10 @@ static const struct of_device_id rockchip_iodomain_match[] = {
 		.data = (void *)&soc_data_rk3288
 	},
 	{
+		.compatible = "rockchip,rk3328-io-voltage-domain",
+		.data = (void *)&soc_data_rk3328
+	},
+	{
 		.compatible = "rockchip,rk3368-io-voltage-domain",
 		.data = (void *)&soc_data_rk3368
 	},
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index b8cacccf18c8..13f1714cf6f7 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -67,6 +67,15 @@ config POWER_RESET_BRCMSTB
 	  Say Y here if you have a Broadcom STB board and you wish
 	  to have restart support.
 
+config POWER_RESET_GEMINI_POWEROFF
+	bool "Cortina Gemini power-off driver"
+	depends on ARCH_GEMINI || COMPILE_TEST
+	depends on OF && HAS_IOMEM
+	default ARCH_GEMINI
+	help
+	  This driver supports turning off the Cortina Gemini SoC.
+	  Select this if you're building a kernel with Gemini SoC support.
+
 config POWER_RESET_GPIO
 	bool "GPIO power-off driver"
 	depends on OF_GPIO
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
index 11dae3b56ff9..58cf5b30559f 100644
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_POWER_RESET_AT91_SAMA5D2_SHDWC) += at91-sama5d2_shdwc.o
 obj-$(CONFIG_POWER_RESET_AXXIA) += axxia-reset.o
 obj-$(CONFIG_POWER_RESET_BRCMKONA) += brcm-kona-reset.o
 obj-$(CONFIG_POWER_RESET_BRCMSTB) += brcmstb-reboot.o
+obj-$(CONFIG_POWER_RESET_GEMINI_POWEROFF) += gemini-poweroff.o
 obj-$(CONFIG_POWER_RESET_GPIO) += gpio-poweroff.o
 obj-$(CONFIG_POWER_RESET_GPIO_RESTART) += gpio-restart.o
 obj-$(CONFIG_POWER_RESET_HISI) += hisi-reboot.o
diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c
new file mode 100644
index 000000000000..de878fd26f27
--- /dev/null
+++ b/drivers/power/reset/gemini-poweroff.c
@@ -0,0 +1,160 @@
+/*
+ * Gemini power management controller
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Inspired by code from the SL3516 board support by Jason Lee
+ * Inspired by code from Janos Laube <janos.dev@gmail.com>
+ */
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/reboot.h>
+
+#define GEMINI_PWC_ID		0x00010500
+#define	GEMINI_PWC_IDREG	0x00
+#define	GEMINI_PWC_CTRLREG	0x04
+#define	GEMINI_PWC_STATREG	0x08
+
+#define GEMINI_CTRL_SHUTDOWN	BIT(0)
+#define GEMINI_CTRL_ENABLE	BIT(1)
+#define GEMINI_CTRL_IRQ_CLR	BIT(2)
+
+#define GEMINI_STAT_CIR		BIT(4)
+#define	GEMINI_STAT_RTC		BIT(5)
+#define	GEMINI_STAT_POWERBUTTON	BIT(6)
+
+struct gemini_powercon {
+        struct device           *dev;
+        void __iomem            *base;
+};
+
+static irqreturn_t gemini_powerbutton_interrupt(int irq, void *data)
+{
+	struct gemini_powercon *gpw = data;
+	u32 val;
+
+	/* ACK the IRQ */
+	val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+	val |= GEMINI_CTRL_IRQ_CLR;
+	writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+	val = readl(gpw->base + GEMINI_PWC_STATREG);
+	val &= 0x70U;
+	switch (val) {
+	case GEMINI_STAT_CIR:
+		dev_info(gpw->dev, "infrared poweroff\n");
+		orderly_poweroff(true);
+		break;
+	case GEMINI_STAT_RTC:
+		dev_info(gpw->dev, "RTC poweroff\n");
+		orderly_poweroff(true);
+		break;
+	case GEMINI_STAT_POWERBUTTON:
+		dev_info(gpw->dev, "poweroff button pressed\n");
+		orderly_poweroff(true);
+		break;
+	default:
+		dev_info(gpw->dev, "other power management IRQ\n");
+		break;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* This callback needs this static local as it has void as argument */
+static struct gemini_powercon *gpw_poweroff;
+
+static void gemini_poweroff(void)
+{
+	struct gemini_powercon *gpw = gpw_poweroff;
+	u32 val;
+
+	dev_crit(gpw->dev, "Gemini power off\n");
+	val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+	val |= GEMINI_CTRL_ENABLE | GEMINI_CTRL_IRQ_CLR;
+	writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+	val &= ~GEMINI_CTRL_ENABLE;
+	val |= GEMINI_CTRL_SHUTDOWN;
+	writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+}
+
+static int gemini_poweroff_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct gemini_powercon *gpw;
+	u32 val;
+	int irq;
+	int ret;
+
+	gpw = devm_kzalloc(dev, sizeof(*gpw), GFP_KERNEL);
+	if (!gpw)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	gpw->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(gpw->base))
+		return PTR_ERR(gpw->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (!irq)
+		return -EINVAL;
+
+	gpw->dev = dev;
+
+	val = readl(gpw->base + GEMINI_PWC_IDREG);
+	val &= 0xFFFFFF00U;
+	if (val != GEMINI_PWC_ID) {
+		dev_err(dev, "wrong power controller ID: %08x\n",
+			val);
+		return -ENODEV;
+	}
+
+	/* Clear the power management IRQ */
+	val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+	val |= GEMINI_CTRL_IRQ_CLR;
+	writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+	ret = devm_request_irq(dev, irq, gemini_powerbutton_interrupt, 0,
+			       "poweroff", gpw);
+	if (ret)
+		return ret;
+
+	pm_power_off = gemini_poweroff;
+	gpw_poweroff = gpw;
+
+	/*
+	 * Enable the power controller. This is crucial on Gemini
+	 * systems: if this is not done, pressing the power button
+	 * will result in unconditional poweroff without any warning.
+	 * This makes the kernel handle the poweroff.
+	 */
+	val = readl(gpw->base + GEMINI_PWC_CTRLREG);
+	val |= GEMINI_CTRL_ENABLE;
+	writel(val, gpw->base + GEMINI_PWC_CTRLREG);
+
+	dev_info(dev, "Gemini poweroff driver registered\n");
+
+	return 0;
+}
+
+static const struct of_device_id gemini_poweroff_of_match[] = {
+	{
+		.compatible = "cortina,gemini-power-controller",
+	},
+	{}
+};
+
+static struct platform_driver gemini_poweroff_driver = {
+	.probe = gemini_poweroff_probe,
+	.driver = {
+		.name = "gemini-poweroff",
+		.of_match_table = gemini_poweroff_of_match,
+	},
+};
+builtin_platform_driver(gemini_poweroff_driver);
diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c
index b68338399e5e..f9f1cb54fbf9 100644
--- a/drivers/power/reset/syscon-poweroff.c
+++ b/drivers/power/reset/syscon-poweroff.c
@@ -28,12 +28,13 @@
 
 static struct regmap *map;
 static u32 offset;
+static u32 value;
 static u32 mask;
 
 static void syscon_poweroff(void)
 {
 	/* Issue the poweroff */
-	regmap_write(map, offset, mask);
+	regmap_update_bits(map, offset, mask, value);
 
 	mdelay(1000);
 
@@ -43,6 +44,7 @@ static void syscon_poweroff(void)
 static int syscon_poweroff_probe(struct platform_device *pdev)
 {
 	char symname[KSYM_NAME_LEN];
+	int mask_err, value_err;
 
 	map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "regmap");
 	if (IS_ERR(map)) {
@@ -55,11 +57,22 @@ static int syscon_poweroff_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (of_property_read_u32(pdev->dev.of_node, "mask", &mask)) {
-		dev_err(&pdev->dev, "unable to read 'mask'");
+	value_err = of_property_read_u32(pdev->dev.of_node, "value", &value);
+	mask_err = of_property_read_u32(pdev->dev.of_node, "mask", &mask);
+	if (value_err && mask_err) {
+		dev_err(&pdev->dev, "unable to read 'value' and 'mask'");
 		return -EINVAL;
 	}
 
+	if (value_err) {
+		/* support old binding */
+		value = mask;
+		mask = 0xFFFFFFFF;
+	} else if (mask_err) {
+		/* support value without mask*/
+		mask = 0xFFFFFFFF;
+	}
+
 	if (pm_power_off) {
 		lookup_symbol_name((ulong)pm_power_off, symname);
 		dev_err(&pdev->dev,
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index da54ac88f068..da922756149f 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -117,6 +117,12 @@ config BATTERY_DS2782
 	  Say Y here to enable support for the DS2782/DS2786 standalone battery
 	  gas-gauge.
 
+config BATTERY_LEGO_EV3
+	tristate "LEGO MINDSTORMS EV3 battery"
+	depends on OF && IIO && GPIOLIB
+	help
+	  Say Y here to enable support for the LEGO MINDSTORMS EV3 battery.
+
 config BATTERY_PMU
 	tristate "Apple PMU battery"
 	depends on PPC32 && ADB_PMU
@@ -317,6 +323,14 @@ config BATTERY_RX51
 	  Say Y here to enable support for battery information on Nokia
 	  RX-51, also known as N900 tablet.
 
+config CHARGER_CPCAP
+	tristate "CPCAP PMIC Charger Driver"
+	depends on MFD_CPCAP && IIO
+	default MFD_CPCAP
+	help
+	  Say Y to enable support for CPCAP PMIC charger driver for Motorola
+	  mobile devices such as Droid 4.
+
 config CHARGER_ISP1704
 	tristate "ISP1704 USB Charger Detection"
 	depends on USB_PHY
@@ -438,6 +452,7 @@ config CHARGER_BQ2415X
 config CHARGER_BQ24190
 	tristate "TI BQ24190 battery charger driver"
 	depends on I2C
+	depends on EXTCON
 	depends on GPIOLIB || COMPILE_TEST
 	help
 	  Say Y to enable support for the TI BQ24190 battery charger.
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
index 3789a2c06fdf..39fc733e6cc4 100644
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_BATTERY_DS2781)	+= ds2781_battery.o
 obj-$(CONFIG_BATTERY_DS2782)	+= ds2782_battery.o
 obj-$(CONFIG_BATTERY_GAUGE_LTC2941)	+= ltc2941-battery-gauge.o
 obj-$(CONFIG_BATTERY_GOLDFISH)	+= goldfish_battery.o
+obj-$(CONFIG_BATTERY_LEGO_EV3)	+= lego_ev3_battery.o
 obj-$(CONFIG_BATTERY_PMU)	+= pmu_battery.o
 obj-$(CONFIG_BATTERY_OLPC)	+= olpc_battery.o
 obj-$(CONFIG_BATTERY_TOSA)	+= tosa_battery.o
@@ -51,6 +52,7 @@ obj-$(CONFIG_CHARGER_PCF50633)	+= pcf50633-charger.o
 obj-$(CONFIG_BATTERY_JZ4740)	+= jz4740-battery.o
 obj-$(CONFIG_BATTERY_RX51)	+= rx51_battery.o
 obj-$(CONFIG_AB8500_BM)		+= ab8500_bmdata.o ab8500_charger.o ab8500_fg.o ab8500_btemp.o abx500_chargalg.o pm2301_charger.o
+obj-$(CONFIG_CHARGER_CPCAP)	+= cpcap-charger.o
 obj-$(CONFIG_CHARGER_ISP1704)	+= isp1704_charger.o
 obj-$(CONFIG_CHARGER_MAX8903)	+= max8903_charger.o
 obj-$(CONFIG_CHARGER_TWL4030)	+= twl4030_charger.o
diff --git a/drivers/power/supply/ab8500_bmdata.c b/drivers/power/supply/ab8500_bmdata.c
index d29864533093..8c49586015d0 100644
--- a/drivers/power/supply/ab8500_bmdata.c
+++ b/drivers/power/supply/ab8500_bmdata.c
@@ -430,10 +430,10 @@ static const struct abx500_maxim_parameters ab8500_maxi_params = {
 };
 
 static const struct abx500_maxim_parameters abx540_maxi_params = {
-        .ena_maxi = true,
-        .chg_curr = 3000,
-        .wait_cycles = 10,
-        .charger_curr_step = 200,
+	.ena_maxi = true,
+	.chg_curr = 3000,
+	.wait_cycles = 10,
+	.charger_curr_step = 200,
 };
 
 static const struct abx500_bm_charger_parameters chg = {
diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c
index 6be2fe27bb07..d51ebd1da65e 100644
--- a/drivers/power/supply/axp288_charger.c
+++ b/drivers/power/supply/axp288_charger.c
@@ -14,6 +14,7 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/regmap.h>
@@ -113,7 +114,8 @@
 #define ILIM_3000MA			3000	/* 3000mA */
 
 #define AXP288_EXTCON_DEV_NAME		"axp288_extcon"
-#define USB_HOST_EXTCON_DEV_NAME	"INT3496:00"
+#define USB_HOST_EXTCON_HID		"INT3496"
+#define USB_HOST_EXTCON_NAME		"INT3496:00"
 
 static const unsigned int cable_ids[] =
 	{ EXTCON_CHG_USB_SDP, EXTCON_CHG_USB_CDP, EXTCON_CHG_USB_DCP };
@@ -807,10 +809,14 @@ static int axp288_charger_probe(struct platform_device *pdev)
 		return -EPROBE_DEFER;
 	}
 
-	info->otg.cable = extcon_get_extcon_dev(USB_HOST_EXTCON_DEV_NAME);
-	if (info->otg.cable == NULL) {
-		dev_dbg(dev, "EXTCON_USB_HOST is not ready, probe deferred\n");
-		return -EPROBE_DEFER;
+	if (acpi_dev_present(USB_HOST_EXTCON_HID, NULL, -1)) {
+		info->otg.cable = extcon_get_extcon_dev(USB_HOST_EXTCON_NAME);
+		if (info->otg.cable == NULL) {
+			dev_dbg(dev, "EXTCON_USB_HOST is not ready, probe deferred\n");
+			return -EPROBE_DEFER;
+		}
+		dev_info(&pdev->dev,
+			 "Using " USB_HOST_EXTCON_HID " extcon for usb-id\n");
 	}
 
 	platform_set_drvdata(pdev, info);
@@ -849,13 +855,15 @@ static int axp288_charger_probe(struct platform_device *pdev)
 	/* Register for OTG notification */
 	INIT_WORK(&info->otg.work, axp288_charger_otg_evt_worker);
 	info->otg.id_nb.notifier_call = axp288_charger_handle_otg_evt;
-	ret = devm_extcon_register_notifier(&pdev->dev, info->otg.cable,
+	if (info->otg.cable) {
+		ret = devm_extcon_register_notifier(&pdev->dev, info->otg.cable,
 					EXTCON_USB_HOST, &info->otg.id_nb);
-	if (ret) {
-		dev_err(dev, "failed to register EXTCON_USB_HOST notifier\n");
-		return ret;
+		if (ret) {
+			dev_err(dev, "failed to register EXTCON_USB_HOST notifier\n");
+			return ret;
+		}
+		schedule_work(&info->otg.work);
 	}
-	schedule_work(&info->otg.work);
 
 	/* Register charger interrupts */
 	for (i = 0; i < CHRG_INTR_END; i++) {
diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index a4f08492abeb..bd9e5c3d8cc2 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -11,16 +11,15 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/extcon.h>
 #include <linux/of_irq.h>
 #include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/power_supply.h>
+#include <linux/workqueue.h>
 #include <linux/gpio.h>
 #include <linux/i2c.h>
 
-#include <linux/power/bq24190_charger.h>
-
-
 #define	BQ24190_MANUFACTURER	"Texas Instruments"
 
 #define BQ24190_REG_ISC		0x00 /* Input Source Control */
@@ -39,6 +38,9 @@
 #define BQ24190_REG_POC_WDT_RESET_SHIFT		6
 #define BQ24190_REG_POC_CHG_CONFIG_MASK		(BIT(5) | BIT(4))
 #define BQ24190_REG_POC_CHG_CONFIG_SHIFT	4
+#define BQ24190_REG_POC_CHG_CONFIG_DISABLE		0x0
+#define BQ24190_REG_POC_CHG_CONFIG_CHARGE		0x1
+#define BQ24190_REG_POC_CHG_CONFIG_OTG			0x2
 #define BQ24190_REG_POC_SYS_MIN_MASK		(BIT(3) | BIT(2) | BIT(1))
 #define BQ24190_REG_POC_SYS_MIN_SHIFT		1
 #define BQ24190_REG_POC_BOOST_LIM_MASK		BIT(0)
@@ -151,10 +153,12 @@ struct bq24190_dev_info {
 	struct device			*dev;
 	struct power_supply		*charger;
 	struct power_supply		*battery;
+	struct extcon_dev		*extcon;
+	struct notifier_block		extcon_nb;
+	struct delayed_work		extcon_work;
 	char				model_name[I2C_NAME_SIZE];
-	kernel_ulong_t			model;
-	unsigned int			gpio_int;
-	unsigned int			irq;
+	bool				initialized;
+	bool				irq_event;
 	struct mutex			f_reg_lock;
 	u8				f_reg;
 	u8				ss_reg;
@@ -168,6 +172,12 @@ struct bq24190_dev_info {
  * number at that index in the array is the real-world value that it
  * represents.
  */
+
+/* REG00[2:0] (IINLIM) in uAh */
+static const int bq24190_isc_iinlim_values[] = {
+	 100000,  150000,  500000,  900000, 1200000, 1500000, 2000000, 3000000
+};
+
 /* REG02[7:2] (ICHG) in uAh */
 static const int bq24190_ccc_ichg_values[] = {
 	 512000,  576000,  640000,  704000,  768000,  832000,  896000,  960000,
@@ -418,6 +428,7 @@ static ssize_t bq24190_sysfs_show(struct device *dev,
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy);
 	struct bq24190_sysfs_field_info *info;
+	ssize_t count;
 	int ret;
 	u8 v;
 
@@ -425,11 +436,20 @@ static ssize_t bq24190_sysfs_show(struct device *dev,
 	if (!info)
 		return -EINVAL;
 
+	ret = pm_runtime_get_sync(bdi->dev);
+	if (ret < 0)
+		return ret;
+
 	ret = bq24190_read_mask(bdi, info->reg, info->mask, info->shift, &v);
 	if (ret)
-		return ret;
+		count = ret;
+	else
+		count = scnprintf(buf, PAGE_SIZE, "%hhx\n", v);
+
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
 
-	return scnprintf(buf, PAGE_SIZE, "%hhx\n", v);
+	return count;
 }
 
 static ssize_t bq24190_sysfs_store(struct device *dev,
@@ -449,9 +469,16 @@ static ssize_t bq24190_sysfs_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	ret = pm_runtime_get_sync(bdi->dev);
+	if (ret < 0)
+		return ret;
+
 	ret = bq24190_write_mask(bdi, info->reg, info->mask, info->shift, v);
 	if (ret)
-		return ret;
+		count = ret;
+
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
 
 	return count;
 }
@@ -523,16 +550,13 @@ static int bq24190_register_reset(struct bq24190_dev_info *bdi)
 		if (ret < 0)
 			return ret;
 
-		if (!v)
-			break;
+		if (v == 0)
+			return 0;
 
-		udelay(10);
+		usleep_range(100, 200);
 	} while (--limit);
 
-	if (!limit)
-		return -EIO;
-
-	return 0;
+	return -EIO;
 }
 
 /* Charger power supply property routines */
@@ -793,7 +817,9 @@ static int bq24190_charger_get_property(struct power_supply *psy,
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
-	pm_runtime_get_sync(bdi->dev);
+	ret = pm_runtime_get_sync(bdi->dev);
+	if (ret < 0)
+		return ret;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
@@ -833,7 +859,9 @@ static int bq24190_charger_get_property(struct power_supply *psy,
 		ret = -ENODATA;
 	}
 
-	pm_runtime_put_sync(bdi->dev);
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
+
 	return ret;
 }
 
@@ -846,7 +874,9 @@ static int bq24190_charger_set_property(struct power_supply *psy,
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
-	pm_runtime_get_sync(bdi->dev);
+	ret = pm_runtime_get_sync(bdi->dev);
+	if (ret < 0)
+		return ret;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_CHARGE_TYPE:
@@ -862,7 +892,9 @@ static int bq24190_charger_set_property(struct power_supply *psy,
 		ret = -EINVAL;
 	}
 
-	pm_runtime_put_sync(bdi->dev);
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
+
 	return ret;
 }
 
@@ -1063,7 +1095,9 @@ static int bq24190_battery_get_property(struct power_supply *psy,
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
-	pm_runtime_get_sync(bdi->dev);
+	ret = pm_runtime_get_sync(bdi->dev);
+	if (ret < 0)
+		return ret;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
@@ -1091,7 +1125,9 @@ static int bq24190_battery_get_property(struct power_supply *psy,
 		ret = -ENODATA;
 	}
 
-	pm_runtime_put_sync(bdi->dev);
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
+
 	return ret;
 }
 
@@ -1104,7 +1140,9 @@ static int bq24190_battery_set_property(struct power_supply *psy,
 
 	dev_dbg(bdi->dev, "prop: %d\n", psp);
 
-	pm_runtime_get_sync(bdi->dev);
+	ret = pm_runtime_get_sync(bdi->dev);
+	if (ret < 0)
+		return ret;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_ONLINE:
@@ -1117,7 +1155,9 @@ static int bq24190_battery_set_property(struct power_supply *psy,
 		ret = -EINVAL;
 	}
 
-	pm_runtime_put_sync(bdi->dev);
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
+
 	return ret;
 }
 
@@ -1157,9 +1197,8 @@ static const struct power_supply_desc bq24190_battery_desc = {
 	.property_is_writeable	= bq24190_battery_property_is_writeable,
 };
 
-static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
+static void bq24190_check_status(struct bq24190_dev_info *bdi)
 {
-	struct bq24190_dev_info *bdi = data;
 	const u8 battery_mask_ss = BQ24190_REG_SS_CHRG_STAT_MASK;
 	const u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK
 				| BQ24190_REG_F_NTC_FAULT_MASK;
@@ -1167,12 +1206,10 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
 	u8 ss_reg = 0, f_reg = 0;
 	int i, ret;
 
-	pm_runtime_get_sync(bdi->dev);
-
 	ret = bq24190_read(bdi, BQ24190_REG_SS, &ss_reg);
 	if (ret < 0) {
 		dev_err(bdi->dev, "Can't read SS reg: %d\n", ret);
-		goto out;
+		return;
 	}
 
 	i = 0;
@@ -1180,12 +1217,17 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
 		ret = bq24190_read(bdi, BQ24190_REG_F, &f_reg);
 		if (ret < 0) {
 			dev_err(bdi->dev, "Can't read F reg: %d\n", ret);
-			goto out;
+			return;
 		}
 	} while (f_reg && ++i < 2);
 
+	/* ignore over/under voltage fault after disconnect */
+	if (f_reg == (1 << BQ24190_REG_F_CHRG_FAULT_SHIFT) &&
+	    !(ss_reg & BQ24190_REG_SS_PG_STAT_MASK))
+		f_reg = 0;
+
 	if (f_reg != bdi->f_reg) {
-		dev_info(bdi->dev,
+		dev_warn(bdi->dev,
 			"Fault: boost %d, charge %d, battery %d, ntc %d\n",
 			!!(f_reg & BQ24190_REG_F_BOOST_FAULT_MASK),
 			!!(f_reg & BQ24190_REG_F_CHRG_FAULT_MASK),
@@ -1229,90 +1271,126 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
 	if (alert_battery)
 		power_supply_changed(bdi->battery);
 
-out:
-	pm_runtime_put_sync(bdi->dev);
-
 	dev_dbg(bdi->dev, "ss_reg: 0x%02x, f_reg: 0x%02x\n", ss_reg, f_reg);
+}
+
+static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
+{
+	struct bq24190_dev_info *bdi = data;
+	int error;
+
+	bdi->irq_event = true;
+	error = pm_runtime_get_sync(bdi->dev);
+	if (error < 0) {
+		dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+		pm_runtime_put_noidle(bdi->dev);
+		return IRQ_NONE;
+	}
+	bq24190_check_status(bdi);
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
+	bdi->irq_event = false;
 
 	return IRQ_HANDLED;
 }
 
-static int bq24190_hw_init(struct bq24190_dev_info *bdi)
+static void bq24190_extcon_work(struct work_struct *work)
 {
+	struct bq24190_dev_info *bdi =
+		container_of(work, struct bq24190_dev_info, extcon_work.work);
+	int error, iinlim = 0;
 	u8 v;
-	int ret;
-
-	pm_runtime_get_sync(bdi->dev);
 
-	/* First check that the device really is what its supposed to be */
-	ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS,
-			BQ24190_REG_VPRS_PN_MASK,
-			BQ24190_REG_VPRS_PN_SHIFT,
-			&v);
-	if (ret < 0)
-		goto out;
+	error = pm_runtime_get_sync(bdi->dev);
+	if (error < 0) {
+		dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+		pm_runtime_put_noidle(bdi->dev);
+		return;
+	}
 
-	if (v != bdi->model) {
-		ret = -ENODEV;
-		goto out;
+	if      (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_SDP) == 1)
+		iinlim =  500000;
+	else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_CDP) == 1 ||
+		 extcon_get_state(bdi->extcon, EXTCON_CHG_USB_ACA) == 1)
+		iinlim = 1500000;
+	else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_DCP) == 1)
+		iinlim = 2000000;
+
+	if (iinlim) {
+		error = bq24190_set_field_val(bdi, BQ24190_REG_ISC,
+					      BQ24190_REG_ISC_IINLIM_MASK,
+					      BQ24190_REG_ISC_IINLIM_SHIFT,
+					      bq24190_isc_iinlim_values,
+					      ARRAY_SIZE(bq24190_isc_iinlim_values),
+					      iinlim);
+		if (error < 0)
+			dev_err(bdi->dev, "Can't set IINLIM: %d\n", error);
 	}
 
-	ret = bq24190_register_reset(bdi);
-	if (ret < 0)
-		goto out;
+	/* if no charger found and in USB host mode, set OTG 5V boost, else normal */
+	if (!iinlim && extcon_get_state(bdi->extcon, EXTCON_USB_HOST) == 1)
+		v = BQ24190_REG_POC_CHG_CONFIG_OTG;
+	else
+		v = BQ24190_REG_POC_CHG_CONFIG_CHARGE;
 
-	ret = bq24190_set_mode_host(bdi);
-	if (ret < 0)
-		goto out;
+	error = bq24190_write_mask(bdi, BQ24190_REG_POC,
+				   BQ24190_REG_POC_CHG_CONFIG_MASK,
+				   BQ24190_REG_POC_CHG_CONFIG_SHIFT,
+				   v);
+	if (error < 0)
+		dev_err(bdi->dev, "Can't set CHG_CONFIG: %d\n", error);
 
-	ret = bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg);
-out:
-	pm_runtime_put_sync(bdi->dev);
-	return ret;
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
 }
 
-#ifdef CONFIG_OF
-static int bq24190_setup_dt(struct bq24190_dev_info *bdi)
+static int bq24190_extcon_event(struct notifier_block *nb, unsigned long event,
+				void *param)
 {
-	bdi->irq = irq_of_parse_and_map(bdi->dev->of_node, 0);
-	if (bdi->irq <= 0)
-		return -1;
+	struct bq24190_dev_info *bdi =
+		container_of(nb, struct bq24190_dev_info, extcon_nb);
 
-	return 0;
-}
-#else
-static int bq24190_setup_dt(struct bq24190_dev_info *bdi)
-{
-	return -1;
+	/*
+	 * The Power-Good detection may take up to 220ms, sometimes
+	 * the external charger detection is quicker, and the bq24190 will
+	 * reset to iinlim based on its own charger detection (which is not
+	 * hooked up when using external charger detection) resulting in
+	 * a too low default 500mA iinlim. Delay applying the extcon value
+	 * for 300ms to avoid this.
+	 */
+	queue_delayed_work(system_wq, &bdi->extcon_work, msecs_to_jiffies(300));
+
+	return NOTIFY_OK;
 }
-#endif
 
-static int bq24190_setup_pdata(struct bq24190_dev_info *bdi,
-		struct bq24190_platform_data *pdata)
+static int bq24190_hw_init(struct bq24190_dev_info *bdi)
 {
+	u8 v;
 	int ret;
 
-	if (!gpio_is_valid(pdata->gpio_int))
-		return -1;
-
-	ret = gpio_request(pdata->gpio_int, dev_name(bdi->dev));
+	/* First check that the device really is what its supposed to be */
+	ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS,
+			BQ24190_REG_VPRS_PN_MASK,
+			BQ24190_REG_VPRS_PN_SHIFT,
+			&v);
 	if (ret < 0)
-		return -1;
+		return ret;
 
-	ret = gpio_direction_input(pdata->gpio_int);
-	if (ret < 0)
-		goto out;
+	if (v != BQ24190_REG_VPRS_PN_24190 &&
+	    v != BQ24190_REG_VPRS_PN_24192I) {
+		dev_err(bdi->dev, "Error unknown model: 0x%02x\n", v);
+		return -ENODEV;
+	}
 
-	bdi->irq = gpio_to_irq(pdata->gpio_int);
-	if (!bdi->irq)
-		goto out;
+	ret = bq24190_register_reset(bdi);
+	if (ret < 0)
+		return ret;
 
-	bdi->gpio_int = pdata->gpio_int;
-	return 0;
+	ret = bq24190_set_mode_host(bdi);
+	if (ret < 0)
+		return ret;
 
-out:
-	gpio_free(pdata->gpio_int);
-	return -1;
+	return bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg);
 }
 
 static int bq24190_probe(struct i2c_client *client,
@@ -1320,9 +1398,9 @@ static int bq24190_probe(struct i2c_client *client,
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct device *dev = &client->dev;
-	struct bq24190_platform_data *pdata = client->dev.platform_data;
 	struct power_supply_config charger_cfg = {}, battery_cfg = {};
 	struct bq24190_dev_info *bdi;
+	const char *name;
 	int ret;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
@@ -1338,7 +1416,6 @@ static int bq24190_probe(struct i2c_client *client,
 
 	bdi->client = client;
 	bdi->dev = dev;
-	bdi->model = id->driver_data;
 	strncpy(bdi->model_name, id->name, I2C_NAME_SIZE);
 	mutex_init(&bdi->f_reg_lock);
 	bdi->f_reg = 0;
@@ -1346,23 +1423,43 @@ static int bq24190_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, bdi);
 
-	if (dev->of_node)
-		ret = bq24190_setup_dt(bdi);
-	else
-		ret = bq24190_setup_pdata(bdi, pdata);
-
-	if (ret) {
+	if (!client->irq) {
 		dev_err(dev, "Can't get irq info\n");
 		return -EINVAL;
 	}
 
+	/*
+	 * Devicetree platforms should get extcon via phandle (not yet supported).
+	 * On ACPI platforms, extcon clients may invoke us with:
+	 * struct property_entry pe[] =
+	 *   { PROPERTY_ENTRY_STRING("extcon-name", client_name), ... };
+	 * struct i2c_board_info bi =
+	 *   { .type = "bq24190", .addr = 0x6b, .properties = pe, .irq = irq };
+	 * struct i2c_adapter ad = { ... };
+	 * i2c_add_adapter(&ad);
+	 * i2c_new_device(&ad, &bi);
+	 */
+	if (device_property_read_string(dev, "extcon-name", &name) == 0) {
+		bdi->extcon = extcon_get_extcon_dev(name);
+		if (!bdi->extcon)
+			return -EPROBE_DEFER;
+
+		dev_info(bdi->dev, "using extcon device %s\n", name);
+	}
+
 	pm_runtime_enable(dev);
-	pm_runtime_resume(dev);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 600);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get failed: %i\n", ret);
+		goto out_pmrt;
+	}
 
 	ret = bq24190_hw_init(bdi);
 	if (ret < 0) {
 		dev_err(dev, "Hardware init failed\n");
-		goto out1;
+		goto out_pmrt;
 	}
 
 	charger_cfg.drv_data = bdi;
@@ -1373,7 +1470,7 @@ static int bq24190_probe(struct i2c_client *client,
 	if (IS_ERR(bdi->charger)) {
 		dev_err(dev, "Can't register charger\n");
 		ret = PTR_ERR(bdi->charger);
-		goto out1;
+		goto out_pmrt;
 	}
 
 	battery_cfg.drv_data = bdi;
@@ -1382,87 +1479,160 @@ static int bq24190_probe(struct i2c_client *client,
 	if (IS_ERR(bdi->battery)) {
 		dev_err(dev, "Can't register battery\n");
 		ret = PTR_ERR(bdi->battery);
-		goto out2;
+		goto out_charger;
 	}
 
 	ret = bq24190_sysfs_create_group(bdi);
 	if (ret) {
 		dev_err(dev, "Can't create sysfs entries\n");
-		goto out3;
+		goto out_battery;
 	}
 
-	ret = devm_request_threaded_irq(dev, bdi->irq, NULL,
+	bdi->initialized = true;
+
+	ret = devm_request_threaded_irq(dev, client->irq, NULL,
 			bq24190_irq_handler_thread,
 			IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 			"bq24190-charger", bdi);
 	if (ret < 0) {
 		dev_err(dev, "Can't set up irq handler\n");
-		goto out4;
+		goto out_sysfs;
+	}
+
+	if (bdi->extcon) {
+		INIT_DELAYED_WORK(&bdi->extcon_work, bq24190_extcon_work);
+		bdi->extcon_nb.notifier_call = bq24190_extcon_event;
+		ret = devm_extcon_register_notifier_all(dev, bdi->extcon,
+							&bdi->extcon_nb);
+		if (ret) {
+			dev_err(dev, "Can't register extcon\n");
+			goto out_sysfs;
+		}
+
+		/* Sync initial cable state */
+		queue_delayed_work(system_wq, &bdi->extcon_work, 0);
 	}
 
+	enable_irq_wake(client->irq);
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
 	return 0;
 
-out4:
+out_sysfs:
 	bq24190_sysfs_remove_group(bdi);
 
-out3:
+out_battery:
 	power_supply_unregister(bdi->battery);
 
-out2:
+out_charger:
 	power_supply_unregister(bdi->charger);
 
-out1:
+out_pmrt:
+	pm_runtime_put_sync(dev);
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-	if (bdi->gpio_int)
-		gpio_free(bdi->gpio_int);
 	return ret;
 }
 
 static int bq24190_remove(struct i2c_client *client)
 {
 	struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+	int error;
 
-	pm_runtime_get_sync(bdi->dev);
-	bq24190_register_reset(bdi);
-	pm_runtime_put_sync(bdi->dev);
+	error = pm_runtime_get_sync(bdi->dev);
+	if (error < 0) {
+		dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+		pm_runtime_put_noidle(bdi->dev);
+	}
 
+	bq24190_register_reset(bdi);
 	bq24190_sysfs_remove_group(bdi);
 	power_supply_unregister(bdi->battery);
 	power_supply_unregister(bdi->charger);
+	if (error >= 0)
+		pm_runtime_put_sync(bdi->dev);
+	pm_runtime_dont_use_autosuspend(bdi->dev);
 	pm_runtime_disable(bdi->dev);
 
-	if (bdi->gpio_int)
-		gpio_free(bdi->gpio_int);
+	return 0;
+}
+
+static __maybe_unused int bq24190_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+
+	if (!bdi->initialized)
+		return 0;
+
+	dev_dbg(bdi->dev, "%s\n", __func__);
+
+	return 0;
+}
+
+static __maybe_unused int bq24190_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+
+	if (!bdi->initialized)
+		return 0;
+
+	if (!bdi->irq_event) {
+		dev_dbg(bdi->dev, "checking events on possible wakeirq\n");
+		bq24190_check_status(bdi);
+	}
 
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int bq24190_pm_suspend(struct device *dev)
+static __maybe_unused int bq24190_pm_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+	int error;
+
+	error = pm_runtime_get_sync(bdi->dev);
+	if (error < 0) {
+		dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+		pm_runtime_put_noidle(bdi->dev);
+	}
 
-	pm_runtime_get_sync(bdi->dev);
 	bq24190_register_reset(bdi);
-	pm_runtime_put_sync(bdi->dev);
+
+	if (error >= 0) {
+		pm_runtime_mark_last_busy(bdi->dev);
+		pm_runtime_put_autosuspend(bdi->dev);
+	}
 
 	return 0;
 }
 
-static int bq24190_pm_resume(struct device *dev)
+static __maybe_unused int bq24190_pm_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
+	int error;
 
 	bdi->f_reg = 0;
 	bdi->ss_reg = BQ24190_REG_SS_VBUS_STAT_MASK; /* impossible state */
 
-	pm_runtime_get_sync(bdi->dev);
+	error = pm_runtime_get_sync(bdi->dev);
+	if (error < 0) {
+		dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error);
+		pm_runtime_put_noidle(bdi->dev);
+	}
+
 	bq24190_register_reset(bdi);
 	bq24190_set_mode_host(bdi);
 	bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg);
-	pm_runtime_put_sync(bdi->dev);
+
+	if (error >= 0) {
+		pm_runtime_mark_last_busy(bdi->dev);
+		pm_runtime_put_autosuspend(bdi->dev);
+	}
 
 	/* Things may have changed while suspended so alert upper layer */
 	power_supply_changed(bdi->charger);
@@ -1470,17 +1640,16 @@ static int bq24190_pm_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(bq24190_pm_ops, bq24190_pm_suspend, bq24190_pm_resume);
+static const struct dev_pm_ops bq24190_pm_ops = {
+	SET_RUNTIME_PM_OPS(bq24190_runtime_suspend, bq24190_runtime_resume,
+			   NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(bq24190_pm_suspend, bq24190_pm_resume)
+};
 
-/*
- * Only support the bq24190 right now.  The bq24192, bq24192i, and bq24193
- * are similar but not identical so the driver needs to be extended to
- * support them.
- */
 static const struct i2c_device_id bq24190_i2c_ids[] = {
-	{ "bq24190", BQ24190_REG_VPRS_PN_24190 },
+	{ "bq24190" },
+	{ "bq24192i" },
 	{ },
 };
 MODULE_DEVICE_TABLE(i2c, bq24190_i2c_ids);
diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index f993a55cde20..8e2c41ded171 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -723,7 +723,7 @@ static int bq25890_irq_probe(struct bq25890_device *bq)
 {
 	struct gpio_desc *irq;
 
-	irq = devm_gpiod_get_index(bq->dev, BQ25890_IRQ_PIN, 0, GPIOD_IN);
+	irq = devm_gpiod_get(bq->dev, BQ25890_IRQ_PIN, GPIOD_IN);
 	if (IS_ERR(irq)) {
 		dev_err(bq->dev, "Could not probe irq pin.\n");
 		return PTR_ERR(irq);
diff --git a/drivers/power/supply/charger-manager.c b/drivers/power/supply/charger-manager.c
index e664ca7c0afd..adc3761831e1 100644
--- a/drivers/power/supply/charger-manager.c
+++ b/drivers/power/supply/charger-manager.c
@@ -1198,7 +1198,7 @@ static int charger_extcon_notifier(struct notifier_block *self,
 static int charger_extcon_init(struct charger_manager *cm,
 		struct charger_cable *cable)
 {
-	int ret = 0;
+	int ret;
 
 	/*
 	 * Charger manager use Extcon framework to identify
@@ -1232,7 +1232,7 @@ static int charger_manager_register_extcon(struct charger_manager *cm)
 {
 	struct charger_desc *desc = cm->desc;
 	struct charger_regulator *charger;
-	int ret = 0;
+	int ret;
 	int i;
 	int j;
 
@@ -1255,15 +1255,14 @@ static int charger_manager_register_extcon(struct charger_manager *cm)
 			if (ret < 0) {
 				dev_err(cm->dev, "Cannot initialize charger(%s)\n",
 					charger->regulator_name);
-				goto err;
+				return ret;
 			}
 			cable->charger = charger;
 			cable->cm = cm;
 		}
 	}
 
-err:
-	return ret;
+	return 0;
 }
 
 /* help function of sysfs node to control charger(regulator) */
@@ -1372,7 +1371,7 @@ static int charger_manager_register_sysfs(struct charger_manager *cm)
 	int chargers_externally_control = 1;
 	char buf[11];
 	char *str;
-	int ret = 0;
+	int ret;
 	int i;
 
 	/* Create sysfs entry to control charger(regulator) */
@@ -1382,10 +1381,9 @@ static int charger_manager_register_sysfs(struct charger_manager *cm)
 		snprintf(buf, 10, "charger.%d", i);
 		str = devm_kzalloc(cm->dev,
 				sizeof(char) * (strlen(buf) + 1), GFP_KERNEL);
-		if (!str) {
-			ret = -ENOMEM;
-			goto err;
-		}
+		if (!str)
+			return -ENOMEM;
+
 		strcpy(str, buf);
 
 		charger->attrs[0] = &charger->attr_name.attr;
@@ -1426,19 +1424,16 @@ static int charger_manager_register_sysfs(struct charger_manager *cm)
 		if (ret < 0) {
 			dev_err(cm->dev, "Cannot create sysfs entry of %s regulator\n",
 				charger->regulator_name);
-			ret = -EINVAL;
-			goto err;
+			return ret;
 		}
 	}
 
 	if (chargers_externally_control) {
 		dev_err(cm->dev, "Cannot register regulator because charger-manager must need at least one charger for charging battery\n");
-		ret = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
-err:
-	return ret;
+	return 0;
 }
 
 static int cm_init_thermal_data(struct charger_manager *cm,
@@ -1626,7 +1621,7 @@ static int charger_manager_probe(struct platform_device *pdev)
 {
 	struct charger_desc *desc = cm_get_drv_data(pdev);
 	struct charger_manager *cm;
-	int ret = 0, i = 0;
+	int ret, i = 0;
 	int j = 0;
 	union power_supply_propval val;
 	struct power_supply *fuel_gauge;
@@ -1887,14 +1882,12 @@ MODULE_DEVICE_TABLE(platform, charger_manager_id);
 
 static int cm_suspend_noirq(struct device *dev)
 {
-	int ret = 0;
-
 	if (device_may_wakeup(dev)) {
 		device_set_wakeup_capable(dev, false);
-		ret = -EAGAIN;
+		return -EAGAIN;
 	}
 
-	return ret;
+	return 0;
 }
 
 static bool cm_need_to_awake(void)
diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c
new file mode 100644
index 000000000000..543a1bd21ab9
--- /dev/null
+++ b/drivers/power/supply/cpcap-charger.c
@@ -0,0 +1,681 @@
+/*
+ * Motorola CPCAP PMIC battery charger driver
+ *
+ * Copyright (C) 2017 Tony Lindgren <tony@atomide.com>
+ *
+ * Rewritten for Linux power framework with some parts based on
+ * on earlier driver found in the Motorola Linux kernel:
+ *
+ * Copyright (C) 2009-2010 Motorola, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/usb/phy_companion.h>
+#include <linux/phy/omap_usb.h>
+#include <linux/usb/otg.h>
+#include <linux/iio/consumer.h>
+#include <linux/mfd/motorola-cpcap.h>
+
+/* CPCAP_REG_CRM register bits */
+#define CPCAP_REG_CRM_UNUSED_641_15	BIT(15)	/* 641 = register number */
+#define CPCAP_REG_CRM_UNUSED_641_14	BIT(14)	/* 641 = register number */
+#define CPCAP_REG_CRM_CHRG_LED_EN	BIT(13)
+#define CPCAP_REG_CRM_RVRSMODE		BIT(12)
+#define CPCAP_REG_CRM_ICHRG_TR1		BIT(11)
+#define CPCAP_REG_CRM_ICHRG_TR0		BIT(10)
+#define CPCAP_REG_CRM_FET_OVRD		BIT(9)
+#define CPCAP_REG_CRM_FET_CTRL		BIT(8)
+#define CPCAP_REG_CRM_VCHRG3		BIT(7)
+#define CPCAP_REG_CRM_VCHRG2		BIT(6)
+#define CPCAP_REG_CRM_VCHRG1		BIT(5)
+#define CPCAP_REG_CRM_VCHRG0		BIT(4)
+#define CPCAP_REG_CRM_ICHRG3		BIT(3)
+#define CPCAP_REG_CRM_ICHRG2		BIT(2)
+#define CPCAP_REG_CRM_ICHRG1		BIT(1)
+#define CPCAP_REG_CRM_ICHRG0		BIT(0)
+
+/* CPCAP_REG_CRM trickle charge voltages */
+#define CPCAP_REG_CRM_TR(val)		(((val) & 0x3) << 10)
+#define CPCAP_REG_CRM_TR_0A00		CPCAP_REG_CRM_TR(0x0)
+#define CPCAP_REG_CRM_TR_0A24		CPCAP_REG_CRM_TR(0x1)
+#define CPCAP_REG_CRM_TR_0A48		CPCAP_REG_CRM_TR(0x2)
+#define CPCAP_REG_CRM_TR_0A72		CPCAP_REG_CRM_TR(0x4)
+
+/* CPCAP_REG_CRM charge voltages */
+#define CPCAP_REG_CRM_VCHRG(val)	(((val) & 0xf) << 4)
+#define CPCAP_REG_CRM_VCHRG_3V80	CPCAP_REG_CRM_VCHRG(0x0)
+#define CPCAP_REG_CRM_VCHRG_4V10	CPCAP_REG_CRM_VCHRG(0x1)
+#define CPCAP_REG_CRM_VCHRG_4V15	CPCAP_REG_CRM_VCHRG(0x2)
+#define CPCAP_REG_CRM_VCHRG_4V20	CPCAP_REG_CRM_VCHRG(0x3)
+#define CPCAP_REG_CRM_VCHRG_4V22	CPCAP_REG_CRM_VCHRG(0x4)
+#define CPCAP_REG_CRM_VCHRG_4V24	CPCAP_REG_CRM_VCHRG(0x5)
+#define CPCAP_REG_CRM_VCHRG_4V26	CPCAP_REG_CRM_VCHRG(0x6)
+#define CPCAP_REG_CRM_VCHRG_4V28	CPCAP_REG_CRM_VCHRG(0x7)
+#define CPCAP_REG_CRM_VCHRG_4V30	CPCAP_REG_CRM_VCHRG(0x8)
+#define CPCAP_REG_CRM_VCHRG_4V32	CPCAP_REG_CRM_VCHRG(0x9)
+#define CPCAP_REG_CRM_VCHRG_4V34	CPCAP_REG_CRM_VCHRG(0xa)
+#define CPCAP_REG_CRM_VCHRG_4V36	CPCAP_REG_CRM_VCHRG(0xb)
+#define CPCAP_REG_CRM_VCHRG_4V38	CPCAP_REG_CRM_VCHRG(0xc)
+#define CPCAP_REG_CRM_VCHRG_4V40	CPCAP_REG_CRM_VCHRG(0xd)
+#define CPCAP_REG_CRM_VCHRG_4V42	CPCAP_REG_CRM_VCHRG(0xe)
+#define CPCAP_REG_CRM_VCHRG_4V44	CPCAP_REG_CRM_VCHRG(0xf)
+
+/* CPCAP_REG_CRM charge currents */
+#define CPCAP_REG_CRM_ICHRG(val)	(((val) & 0xf) << 0)
+#define CPCAP_REG_CRM_ICHRG_0A000	CPCAP_REG_CRM_ICHRG(0x0)
+#define CPCAP_REG_CRM_ICHRG_0A070	CPCAP_REG_CRM_ICHRG(0x1)
+#define CPCAP_REG_CRM_ICHRG_0A176	CPCAP_REG_CRM_ICHRG(0x2)
+#define CPCAP_REG_CRM_ICHRG_0A264	CPCAP_REG_CRM_ICHRG(0x3)
+#define CPCAP_REG_CRM_ICHRG_0A352	CPCAP_REG_CRM_ICHRG(0x4)
+#define CPCAP_REG_CRM_ICHRG_0A440	CPCAP_REG_CRM_ICHRG(0x5)
+#define CPCAP_REG_CRM_ICHRG_0A528	CPCAP_REG_CRM_ICHRG(0x6)
+#define CPCAP_REG_CRM_ICHRG_0A616	CPCAP_REG_CRM_ICHRG(0x7)
+#define CPCAP_REG_CRM_ICHRG_0A704	CPCAP_REG_CRM_ICHRG(0x8)
+#define CPCAP_REG_CRM_ICHRG_0A792	CPCAP_REG_CRM_ICHRG(0x9)
+#define CPCAP_REG_CRM_ICHRG_0A880	CPCAP_REG_CRM_ICHRG(0xa)
+#define CPCAP_REG_CRM_ICHRG_0A968	CPCAP_REG_CRM_ICHRG(0xb)
+#define CPCAP_REG_CRM_ICHRG_1A056	CPCAP_REG_CRM_ICHRG(0xc)
+#define CPCAP_REG_CRM_ICHRG_1A144	CPCAP_REG_CRM_ICHRG(0xd)
+#define CPCAP_REG_CRM_ICHRG_1A584	CPCAP_REG_CRM_ICHRG(0xe)
+#define CPCAP_REG_CRM_ICHRG_NO_LIMIT	CPCAP_REG_CRM_ICHRG(0xf)
+
+enum {
+	CPCAP_CHARGER_IIO_BATTDET,
+	CPCAP_CHARGER_IIO_VOLTAGE,
+	CPCAP_CHARGER_IIO_VBUS,
+	CPCAP_CHARGER_IIO_CHRG_CURRENT,
+	CPCAP_CHARGER_IIO_BATT_CURRENT,
+	CPCAP_CHARGER_IIO_NR,
+};
+
+struct cpcap_charger_ddata {
+	struct device *dev;
+	struct regmap *reg;
+	struct list_head irq_list;
+	struct delayed_work detect_work;
+	struct delayed_work vbus_work;
+	struct gpio_desc *gpio[2];		/* gpio_reven0 & 1 */
+
+	struct iio_channel *channels[CPCAP_CHARGER_IIO_NR];
+
+	struct power_supply *usb;
+
+	struct phy_companion comparator;	/* For USB VBUS */
+	bool vbus_enabled;
+	atomic_t active;
+
+	int status;
+};
+
+struct cpcap_interrupt_desc {
+	int irq;
+	struct list_head node;
+	const char *name;
+};
+
+struct cpcap_charger_ints_state {
+	bool chrg_det;
+	bool rvrs_chrg;
+	bool vbusov;
+
+	bool chrg_se1b;
+	bool rvrs_mode;
+	bool chrgcurr1;
+	bool vbusvld;
+
+	bool battdetb;
+};
+
+static enum power_supply_property cpcap_charger_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+};
+
+static bool cpcap_charger_battery_found(struct cpcap_charger_ddata *ddata)
+{
+	struct iio_channel *channel;
+	int error, value;
+
+	channel = ddata->channels[CPCAP_CHARGER_IIO_BATTDET];
+	error = iio_read_channel_raw(channel, &value);
+	if (error < 0) {
+		dev_warn(ddata->dev, "%s failed: %i\n", __func__, error);
+
+		return false;
+	}
+
+	return value == 1;
+}
+
+static int cpcap_charger_get_charge_voltage(struct cpcap_charger_ddata *ddata)
+{
+	struct iio_channel *channel;
+	int error, value = 0;
+
+	channel = ddata->channels[CPCAP_CHARGER_IIO_VOLTAGE];
+	error = iio_read_channel_processed(channel, &value);
+	if (error < 0) {
+		dev_warn(ddata->dev, "%s failed: %i\n", __func__, error);
+
+		return 0;
+	}
+
+	return value;
+}
+
+static int cpcap_charger_get_charge_current(struct cpcap_charger_ddata *ddata)
+{
+	struct iio_channel *channel;
+	int error, value = 0;
+
+	channel = ddata->channels[CPCAP_CHARGER_IIO_CHRG_CURRENT];
+	error = iio_read_channel_processed(channel, &value);
+	if (error < 0) {
+		dev_warn(ddata->dev, "%s failed: %i\n", __func__, error);
+
+		return 0;
+	}
+
+	return value;
+}
+
+static int cpcap_charger_get_property(struct power_supply *psy,
+				      enum power_supply_property psp,
+				      union power_supply_propval *val)
+{
+	struct cpcap_charger_ddata *ddata = dev_get_drvdata(psy->dev.parent);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = ddata->status;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		if (ddata->status == POWER_SUPPLY_STATUS_CHARGING)
+			val->intval = cpcap_charger_get_charge_voltage(ddata) *
+				1000;
+		else
+			val->intval = 0;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		if (ddata->status == POWER_SUPPLY_STATUS_CHARGING)
+			val->intval = cpcap_charger_get_charge_current(ddata) *
+				1000;
+		else
+			val->intval = 0;
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = ddata->status == POWER_SUPPLY_STATUS_CHARGING;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void cpcap_charger_set_cable_path(struct cpcap_charger_ddata *ddata,
+					 bool enabled)
+{
+	if (!ddata->gpio[0])
+		return;
+
+	gpiod_set_value(ddata->gpio[0], enabled);
+}
+
+static void cpcap_charger_set_inductive_path(struct cpcap_charger_ddata *ddata,
+					     bool enabled)
+{
+	if (!ddata->gpio[1])
+		return;
+
+	gpiod_set_value(ddata->gpio[1], enabled);
+}
+
+static int cpcap_charger_set_state(struct cpcap_charger_ddata *ddata,
+				   int max_voltage, int charge_current,
+				   int trickle_current)
+{
+	bool enable;
+	int error;
+
+	enable = max_voltage && (charge_current || trickle_current);
+	dev_dbg(ddata->dev, "%s enable: %i\n", __func__, enable);
+
+	if (!enable) {
+		error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
+					   0x3fff,
+					   CPCAP_REG_CRM_FET_OVRD |
+					   CPCAP_REG_CRM_FET_CTRL);
+		if (error) {
+			ddata->status = POWER_SUPPLY_STATUS_UNKNOWN;
+			goto out_err;
+		}
+
+		ddata->status = POWER_SUPPLY_STATUS_DISCHARGING;
+
+		return 0;
+	}
+
+	error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, 0x3fff,
+				   CPCAP_REG_CRM_CHRG_LED_EN |
+				   trickle_current |
+				   CPCAP_REG_CRM_FET_OVRD |
+				   CPCAP_REG_CRM_FET_CTRL |
+				   max_voltage |
+				   charge_current);
+	if (error) {
+		ddata->status = POWER_SUPPLY_STATUS_UNKNOWN;
+		goto out_err;
+	}
+
+	ddata->status = POWER_SUPPLY_STATUS_CHARGING;
+
+	return 0;
+
+out_err:
+	dev_err(ddata->dev, "%s failed with %i\n", __func__, error);
+
+	return error;
+}
+
+static bool cpcap_charger_vbus_valid(struct cpcap_charger_ddata *ddata)
+{
+	int error, value = 0;
+	struct iio_channel *channel =
+		ddata->channels[CPCAP_CHARGER_IIO_VBUS];
+
+	error = iio_read_channel_processed(channel, &value);
+	if (error >= 0)
+		return value > 3900 ? true : false;
+
+	dev_err(ddata->dev, "error reading VBUS: %i\n", error);
+
+	return false;
+}
+
+/* VBUS control functions for the USB PHY companion */
+
+static void cpcap_charger_vbus_work(struct work_struct *work)
+{
+	struct cpcap_charger_ddata *ddata;
+	bool vbus = false;
+	int error;
+
+	ddata = container_of(work, struct cpcap_charger_ddata,
+			     vbus_work.work);
+
+	if (ddata->vbus_enabled) {
+		vbus = cpcap_charger_vbus_valid(ddata);
+		if (vbus) {
+			dev_info(ddata->dev, "VBUS already provided\n");
+
+			return;
+		}
+
+		cpcap_charger_set_cable_path(ddata, false);
+		cpcap_charger_set_inductive_path(ddata, false);
+
+		error = cpcap_charger_set_state(ddata, 0, 0, 0);
+		if (error)
+			goto out_err;
+
+		error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
+					   CPCAP_REG_CRM_RVRSMODE,
+					   CPCAP_REG_CRM_RVRSMODE);
+		if (error)
+			goto out_err;
+	} else {
+		error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM,
+					   CPCAP_REG_CRM_RVRSMODE, 0);
+		if (error)
+			goto out_err;
+
+		cpcap_charger_set_cable_path(ddata, true);
+		cpcap_charger_set_inductive_path(ddata, true);
+	}
+
+	return;
+
+out_err:
+	dev_err(ddata->dev, "%s could not %s vbus: %i\n", __func__,
+		ddata->vbus_enabled ? "enable" : "disable", error);
+}
+
+static int cpcap_charger_set_vbus(struct phy_companion *comparator,
+				  bool enabled)
+{
+	struct cpcap_charger_ddata *ddata =
+		container_of(comparator, struct cpcap_charger_ddata,
+			     comparator);
+
+	ddata->vbus_enabled = enabled;
+	schedule_delayed_work(&ddata->vbus_work, 0);
+
+	return 0;
+}
+
+/* Charger interrupt handling functions */
+
+static int cpcap_charger_get_ints_state(struct cpcap_charger_ddata *ddata,
+					struct cpcap_charger_ints_state *s)
+{
+	int val, error;
+
+	error = regmap_read(ddata->reg, CPCAP_REG_INTS1, &val);
+	if (error)
+		return error;
+
+	s->chrg_det = val & BIT(13);
+	s->rvrs_chrg = val & BIT(12);
+	s->vbusov = val & BIT(11);
+
+	error = regmap_read(ddata->reg, CPCAP_REG_INTS2, &val);
+	if (error)
+		return error;
+
+	s->chrg_se1b = val & BIT(13);
+	s->rvrs_mode = val & BIT(6);
+	s->chrgcurr1 = val & BIT(4);
+	s->vbusvld = val & BIT(3);
+
+	error = regmap_read(ddata->reg, CPCAP_REG_INTS4, &val);
+	if (error)
+		return error;
+
+	s->battdetb = val & BIT(6);
+
+	return 0;
+}
+
+static void cpcap_usb_detect(struct work_struct *work)
+{
+	struct cpcap_charger_ddata *ddata;
+	struct cpcap_charger_ints_state s;
+	int error;
+
+	ddata = container_of(work, struct cpcap_charger_ddata,
+			     detect_work.work);
+
+	error = cpcap_charger_get_ints_state(ddata, &s);
+	if (error)
+		return;
+
+	if (cpcap_charger_vbus_valid(ddata) && s.chrgcurr1) {
+		int max_current;
+
+		if (cpcap_charger_battery_found(ddata))
+			max_current = CPCAP_REG_CRM_ICHRG_1A584;
+		else
+			max_current = CPCAP_REG_CRM_ICHRG_0A528;
+
+		error = cpcap_charger_set_state(ddata,
+						CPCAP_REG_CRM_VCHRG_4V20,
+						max_current,
+						CPCAP_REG_CRM_TR_0A72);
+		if (error)
+			goto out_err;
+	} else {
+		error = cpcap_charger_set_state(ddata, 0, 0, 0);
+		if (error)
+			goto out_err;
+	}
+
+	return;
+
+out_err:
+	dev_err(ddata->dev, "%s failed with %i\n", __func__, error);
+}
+
+static irqreturn_t cpcap_charger_irq_thread(int irq, void *data)
+{
+	struct cpcap_charger_ddata *ddata = data;
+
+	if (!atomic_read(&ddata->active))
+		return IRQ_NONE;
+
+	schedule_delayed_work(&ddata->detect_work, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int cpcap_usb_init_irq(struct platform_device *pdev,
+			      struct cpcap_charger_ddata *ddata,
+			      const char *name)
+{
+	struct cpcap_interrupt_desc *d;
+	int irq, error;
+
+	irq = platform_get_irq_byname(pdev, name);
+	if (!irq)
+		return -ENODEV;
+
+	error = devm_request_threaded_irq(ddata->dev, irq, NULL,
+					  cpcap_charger_irq_thread,
+					  IRQF_SHARED,
+					  name, ddata);
+	if (error) {
+		dev_err(ddata->dev, "could not get irq %s: %i\n",
+			name, error);
+
+		return error;
+	}
+
+	d = devm_kzalloc(ddata->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->name = name;
+	d->irq = irq;
+	list_add(&d->node, &ddata->irq_list);
+
+	return 0;
+}
+
+static const char * const cpcap_charger_irqs[] = {
+	/* REG_INT_0 */
+	"chrg_det", "rvrs_chrg",
+
+	/* REG_INT1 */
+	"chrg_se1b", "se0conn", "rvrs_mode", "chrgcurr1", "vbusvld",
+
+	/* REG_INT_3 */
+	"battdetb",
+};
+
+static int cpcap_usb_init_interrupts(struct platform_device *pdev,
+				     struct cpcap_charger_ddata *ddata)
+{
+	int i, error;
+
+	for (i = 0; i < ARRAY_SIZE(cpcap_charger_irqs); i++) {
+		error = cpcap_usb_init_irq(pdev, ddata, cpcap_charger_irqs[i]);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+
+static void cpcap_charger_init_optional_gpios(struct cpcap_charger_ddata *ddata)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		ddata->gpio[i] = devm_gpiod_get_index(ddata->dev, "mode",
+						      i, GPIOD_OUT_HIGH);
+		if (IS_ERR(ddata->gpio[i])) {
+			dev_info(ddata->dev, "no mode change GPIO%i: %li\n",
+				 i, PTR_ERR(ddata->gpio[i]));
+				 ddata->gpio[i] = NULL;
+		}
+	}
+}
+
+static int cpcap_charger_init_iio(struct cpcap_charger_ddata *ddata)
+{
+	const char * const names[CPCAP_CHARGER_IIO_NR] = {
+		"battdetb", "battp", "vbus", "chg_isense", "batti",
+	};
+	int error, i;
+
+	for (i = 0; i < CPCAP_CHARGER_IIO_NR; i++) {
+		ddata->channels[i] = devm_iio_channel_get(ddata->dev,
+							  names[i]);
+		if (IS_ERR(ddata->channels[i])) {
+			error = PTR_ERR(ddata->channels[i]);
+			goto out_err;
+		}
+
+		if (!ddata->channels[i]->indio_dev) {
+			error = -ENXIO;
+			goto out_err;
+		}
+	}
+
+	return 0;
+
+out_err:
+	dev_err(ddata->dev, "could not initialize VBUS or ID IIO: %i\n",
+		error);
+
+	return error;
+}
+
+static const struct power_supply_desc cpcap_charger_usb_desc = {
+	.name		= "cpcap_usb",
+	.type		= POWER_SUPPLY_TYPE_USB,
+	.properties	= cpcap_charger_props,
+	.num_properties	= ARRAY_SIZE(cpcap_charger_props),
+	.get_property	= cpcap_charger_get_property,
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id cpcap_charger_id_table[] = {
+	{
+		.compatible = "motorola,mapphone-cpcap-charger",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, cpcap_charger_id_table);
+#endif
+
+static int cpcap_charger_probe(struct platform_device *pdev)
+{
+	struct cpcap_charger_ddata *ddata;
+	const struct of_device_id *of_id;
+	int error;
+
+	of_id = of_match_device(of_match_ptr(cpcap_charger_id_table),
+				&pdev->dev);
+	if (!of_id)
+		return -EINVAL;
+
+	ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
+	if (!ddata)
+		return -ENOMEM;
+
+	ddata->dev = &pdev->dev;
+
+	ddata->reg = dev_get_regmap(ddata->dev->parent, NULL);
+	if (!ddata->reg)
+		return -ENODEV;
+
+	INIT_LIST_HEAD(&ddata->irq_list);
+	INIT_DELAYED_WORK(&ddata->detect_work, cpcap_usb_detect);
+	INIT_DELAYED_WORK(&ddata->vbus_work, cpcap_charger_vbus_work);
+	platform_set_drvdata(pdev, ddata);
+
+	error = cpcap_charger_init_iio(ddata);
+	if (error)
+		return error;
+
+	atomic_set(&ddata->active, 1);
+
+	ddata->usb = devm_power_supply_register(ddata->dev,
+						&cpcap_charger_usb_desc,
+						NULL);
+	if (IS_ERR(ddata->usb)) {
+		error = PTR_ERR(ddata->usb);
+		dev_err(ddata->dev, "failed to register USB charger: %i\n",
+			error);
+
+		return error;
+	}
+
+	error = cpcap_usb_init_interrupts(pdev, ddata);
+	if (error)
+		return error;
+
+	ddata->comparator.set_vbus = cpcap_charger_set_vbus;
+	error = omap_usb2_set_comparator(&ddata->comparator);
+	if (error == -ENODEV) {
+		dev_info(ddata->dev, "charger needs phy, deferring probe\n");
+		return -EPROBE_DEFER;
+	}
+
+	cpcap_charger_init_optional_gpios(ddata);
+
+	schedule_delayed_work(&ddata->detect_work, 0);
+
+	return 0;
+}
+
+static int cpcap_charger_remove(struct platform_device *pdev)
+{
+	struct cpcap_charger_ddata *ddata = platform_get_drvdata(pdev);
+	int error;
+
+	atomic_set(&ddata->active, 0);
+	error = omap_usb2_set_comparator(NULL);
+	if (error)
+		dev_warn(ddata->dev, "could not clear USB comparator: %i\n",
+			 error);
+
+	error = cpcap_charger_set_state(ddata, 0, 0, 0);
+	if (error)
+		dev_warn(ddata->dev, "could not clear charger: %i\n",
+			 error);
+	cancel_delayed_work_sync(&ddata->vbus_work);
+	cancel_delayed_work_sync(&ddata->detect_work);
+
+	return 0;
+}
+
+static struct platform_driver cpcap_charger_driver = {
+	.probe = cpcap_charger_probe,
+	.driver	= {
+		.name	= "cpcap-charger",
+		.of_match_table = of_match_ptr(cpcap_charger_id_table),
+	},
+	.remove	= cpcap_charger_remove,
+};
+module_platform_driver(cpcap_charger_driver);
+
+MODULE_AUTHOR("Tony Lindgren <tony@atomide.com>");
+MODULE_DESCRIPTION("CPCAP Battery Charger Interface driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:cpcap-charger");
diff --git a/drivers/power/supply/lego_ev3_battery.c b/drivers/power/supply/lego_ev3_battery.c
new file mode 100644
index 000000000000..7b993d669f7f
--- /dev/null
+++ b/drivers/power/supply/lego_ev3_battery.c
@@ -0,0 +1,228 @@
+/*
+ * Battery driver for LEGO MINDSTORMS EV3
+ *
+ * Copyright (C) 2017 David Lechner <david@lechnology.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iio/consumer.h>
+#include <linux/iio/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+
+struct lego_ev3_battery {
+	struct iio_channel *iio_v;
+	struct iio_channel *iio_i;
+	struct gpio_desc *rechargeable_gpio;
+	struct power_supply *psy;
+	int technology;
+	int v_max;
+	int v_min;
+};
+
+static int lego_ev3_battery_get_property(struct power_supply *psy,
+					 enum power_supply_property psp,
+					 union power_supply_propval *val)
+{
+	struct lego_ev3_battery *batt = power_supply_get_drvdata(psy);
+	int val2;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = batt->technology;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		/* battery voltage is iio channel * 2 + Vce of transistor */
+		iio_read_channel_processed(batt->iio_v, &val->intval);
+		val->intval *= 2000;
+		val->intval += 200000;
+		/* plus adjust for shunt resistor drop */
+		iio_read_channel_processed(batt->iio_i, &val2);
+		val2 *= 1000;
+		val2 /= 15;
+		val->intval += val2;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		val->intval = batt->v_max;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = batt->v_min;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		/* battery current is iio channel / 15 / 0.05 ohms */
+		iio_read_channel_processed(batt->iio_i, &val->intval);
+		val->intval *= 20000;
+		val->intval /= 15;
+		break;
+	case POWER_SUPPLY_PROP_SCOPE:
+		val->intval = POWER_SUPPLY_SCOPE_SYSTEM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int lego_ev3_battery_set_property(struct power_supply *psy,
+					 enum power_supply_property psp,
+					 const union power_supply_propval *val)
+{
+	struct lego_ev3_battery *batt = power_supply_get_drvdata(psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		/*
+		 * Only allow changing technology from Unknown to NiMH. Li-ion
+		 * batteries are automatically detected and should not be
+		 * overridden. Rechargeable AA batteries, on the other hand,
+		 * cannot be automatically detected, and so must be manually
+		 * specified. This should only be set once during system init,
+		 * so there is no mechanism to go back to Unknown.
+		 */
+		if (batt->technology != POWER_SUPPLY_TECHNOLOGY_UNKNOWN)
+			return -EINVAL;
+		switch (val->intval) {
+		case POWER_SUPPLY_TECHNOLOGY_NiMH:
+			batt->technology = POWER_SUPPLY_TECHNOLOGY_NiMH;
+			batt->v_max = 7800000;
+			batt->v_min = 5400000;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int lego_ev3_battery_property_is_writeable(struct power_supply *psy,
+						  enum power_supply_property psp)
+{
+	struct lego_ev3_battery *batt = power_supply_get_drvdata(psy);
+
+	return psp == POWER_SUPPLY_PROP_TECHNOLOGY &&
+		batt->technology == POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+}
+
+static enum power_supply_property lego_ev3_battery_props[] = {
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_SCOPE,
+};
+
+static const struct power_supply_desc lego_ev3_battery_desc = {
+	.name			= "lego-ev3-battery",
+	.type			= POWER_SUPPLY_TYPE_BATTERY,
+	.properties		= lego_ev3_battery_props,
+	.num_properties		= ARRAY_SIZE(lego_ev3_battery_props),
+	.get_property		= lego_ev3_battery_get_property,
+	.set_property		= lego_ev3_battery_set_property,
+	.property_is_writeable	= lego_ev3_battery_property_is_writeable,
+};
+
+static int lego_ev3_battery_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct lego_ev3_battery *batt;
+	struct power_supply_config psy_cfg = {};
+	int err;
+
+	batt = devm_kzalloc(dev, sizeof(*batt), GFP_KERNEL);
+	if (!batt)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, batt);
+
+	batt->iio_v = devm_iio_channel_get(dev, "voltage");
+	err = PTR_ERR_OR_ZERO(batt->iio_v);
+	if (err) {
+		if (err != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get voltage iio channel\n");
+		return err;
+	}
+
+	batt->iio_i = devm_iio_channel_get(dev, "current");
+	err = PTR_ERR_OR_ZERO(batt->iio_i);
+	if (err) {
+		if (err != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get current iio channel\n");
+		return err;
+	}
+
+	batt->rechargeable_gpio = devm_gpiod_get(dev, "rechargeable", GPIOD_IN);
+	err = PTR_ERR_OR_ZERO(batt->rechargeable_gpio);
+	if (err) {
+		if (err != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get rechargeable gpio\n");
+		return err;
+	}
+
+	/*
+	 * The rechargeable battery indication switch cannot be changed without
+	 * removing the battery, so we only need to read it once.
+	 */
+	if (gpiod_get_value(batt->rechargeable_gpio)) {
+		/* 2-cell Li-ion, 7.4V nominal */
+		batt->technology = POWER_SUPPLY_TECHNOLOGY_LION;
+		batt->v_max = 84000000;
+		batt->v_min = 60000000;
+	} else {
+		/* 6x AA Alkaline, 9V nominal */
+		batt->technology = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+		batt->v_max = 90000000;
+		batt->v_min = 48000000;
+	}
+
+	psy_cfg.of_node = pdev->dev.of_node;
+	psy_cfg.drv_data = batt;
+
+	batt->psy = devm_power_supply_register(dev, &lego_ev3_battery_desc,
+					       &psy_cfg);
+	err = PTR_ERR_OR_ZERO(batt->psy);
+	if (err) {
+		dev_err(dev, "failed to register power supply\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id of_lego_ev3_battery_match[] = {
+	{ .compatible = "lego,ev3-battery", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, of_lego_ev3_battery_match);
+
+static struct platform_driver lego_ev3_battery_driver = {
+	.driver	= {
+		.name		= "lego-ev3-battery",
+		.of_match_table = of_lego_ev3_battery_match,
+	},
+	.probe	= lego_ev3_battery_probe,
+};
+module_platform_driver(lego_ev3_battery_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Lechner <david@lechnology.com>");
+MODULE_DESCRIPTION("LEGO MINDSTORMS EV3 Battery Driver");
diff --git a/drivers/power/supply/lp8788-charger.c b/drivers/power/supply/lp8788-charger.c
index 509e2b341bd6..677f7c40b25a 100644
--- a/drivers/power/supply/lp8788-charger.c
+++ b/drivers/power/supply/lp8788-charger.c
@@ -651,7 +651,7 @@ static ssize_t lp8788_show_eoc_time(struct device *dev,
 {
 	struct lp8788_charger *pchg = dev_get_drvdata(dev);
 	char *stime[] = { "400ms", "5min", "10min", "15min",
-			"20min", "25min", "30min" "No timeout" };
+			"20min", "25min", "30min", "No timeout" };
 	u8 val;
 
 	lp8788_read_byte(pchg->lp, LP8788_CHG_EOC, &val);
diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c
index 4adf2ba021ce..7efb908f4451 100644
--- a/drivers/power/supply/ltc2941-battery-gauge.c
+++ b/drivers/power/supply/ltc2941-battery-gauge.c
@@ -9,6 +9,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/swab.h>
@@ -61,7 +62,7 @@ struct ltc294x_info {
 	struct power_supply *supply;	/* Supply pointer */
 	struct power_supply_desc supply_desc;	/* Supply description */
 	struct delayed_work work;	/* Work scheduler */
-	int num_regs;	/* Number of registers (chip type) */
+	unsigned long num_regs;	/* Number of registers (chip type) */
 	int charge;	/* Last charge register content */
 	int r_sense;	/* mOhm */
 	int Qlsb;	/* nAh */
@@ -387,7 +388,7 @@ static int ltc294x_i2c_probe(struct i2c_client *client,
 
 	np = of_node_get(client->dev.of_node);
 
-	info->num_regs = id->driver_data;
+	info->num_regs = (unsigned long)of_device_get_match_data(&client->dev);
 	info->supply_desc.name = np->name;
 
 	/* r_sense can be negative, when sense+ is connected to the battery
@@ -497,9 +498,23 @@ static const struct i2c_device_id ltc294x_i2c_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ltc294x_i2c_id);
 
+static const struct of_device_id ltc294x_i2c_of_match[] = {
+	{
+		.compatible = "lltc,ltc2941",
+		.data = (void *)LTC2941_NUM_REGS
+	},
+	{
+		.compatible = "lltc,ltc2943",
+		.data = (void *)LTC2943_NUM_REGS
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ltc294x_i2c_of_match);
+
 static struct i2c_driver ltc294x_driver = {
 	.driver = {
 		.name	= "LTC2941",
+		.of_match_table = ltc294x_i2c_of_match,
 		.pm	= LTC294X_PM_OPS,
 	},
 	.probe		= ltc294x_i2c_probe,
diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c
index e7c3649b31a0..33c40f79d23d 100644
--- a/drivers/power/supply/max17040_battery.c
+++ b/drivers/power/supply/max17040_battery.c
@@ -277,9 +277,17 @@ static const struct i2c_device_id max17040_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, max17040_id);
 
+static const struct of_device_id max17040_of_match[] = {
+	{ .compatible = "maxim,max17040" },
+	{ .compatible = "maxim,max77836-battery" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, max17040_of_match);
+
 static struct i2c_driver max17040_i2c_driver = {
 	.driver	= {
 		.name	= "max17040",
+		.of_match_table = max17040_of_match,
 		.pm	= MAX17040_PM_OPS,
 	},
 	.probe		= max17040_probe,
diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c
index 353765a5f44c..15947dbb511e 100644
--- a/drivers/power/supply/sbs-charger.c
+++ b/drivers/power/supply/sbs-charger.c
@@ -137,10 +137,7 @@ static enum power_supply_property sbs_properties[] = {
 
 static bool sbs_readable_reg(struct device *dev, unsigned int reg)
 {
-	if (reg < SBS_CHARGER_REG_SPEC_INFO)
-		return false;
-	else
-		return true;
+	return reg >= SBS_CHARGER_REG_SPEC_INFO;
 }
 
 static bool sbs_volatile_reg(struct device *dev, unsigned int reg)
diff --git a/drivers/power/supply/tps65217_charger.c b/drivers/power/supply/tps65217_charger.c
index 29b61e81b385..1f5234098aaf 100644
--- a/drivers/power/supply/tps65217_charger.c
+++ b/drivers/power/supply/tps65217_charger.c
@@ -58,8 +58,6 @@ static int tps65217_config_charger(struct tps65217_charger *charger)
 {
 	int ret;
 
-	dev_dbg(charger->dev, "%s\n", __func__);
-
 	/*
 	 * tps65217 rev. G, p. 31 (see p. 32 for NTC schematic)
 	 *
@@ -205,8 +203,6 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 	int ret;
 	int i;
 
-	dev_dbg(&pdev->dev, "%s\n", __func__);
-
 	charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL);
 	if (!charger)
 		return -ENOMEM;
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index bcd4dc304f27..990ff3d218bc 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -1117,7 +1117,7 @@ fail:
 	return ret;
 }
 
-static int __exit twl4030_bci_remove(struct platform_device *pdev)
+static int twl4030_bci_remove(struct platform_device *pdev)
 {
 	struct twl4030_bci *bci = platform_get_drvdata(pdev);
 
@@ -1148,11 +1148,11 @@ MODULE_DEVICE_TABLE(of, twl_bci_of_match);
 
 static struct platform_driver twl4030_bci_driver = {
 	.probe = twl4030_bci_probe,
+	.remove	= twl4030_bci_remove,
 	.driver	= {
 		.name	= "twl4030_bci",
 		.of_match_table = of_match_ptr(twl_bci_of_match),
 	},
-	.remove	= __exit_p(twl4030_bci_remove),
 };
 module_platform_driver(twl4030_bci_driver);
 
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 6ff61dad5e21..62fed9dc893e 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -183,11 +183,33 @@ static void jsfd_read(char *buf, unsigned long p, size_t togo) {
 	}
 }
 
-static void jsfd_do_request(struct request_queue *q)
+static int jsfd_queue;
+
+static struct request *jsfd_next_request(void)
+{
+	struct request_queue *q;
+	struct request *rq;
+	int old_pos = jsfd_queue;
+
+	do {
+		q = jsfd_disk[jsfd_queue]->queue;
+		if (++jsfd_queue == JSF_MAX)
+			jsfd_queue = 0;
+		if (q) {
+			rq = blk_fetch_request(q);
+			if (rq)
+				return rq;
+		}
+	} while (jsfd_queue != old_pos);
+
+	return NULL;
+}
+
+static void jsfd_request(void)
 {
 	struct request *req;
 
-	req = blk_fetch_request(q);
+	req = jsfd_next_request();
 	while (req) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
@@ -211,10 +233,15 @@ static void jsfd_do_request(struct request_queue *q)
 		err = 0;
 	end:
 		if (!__blk_end_request_cur(req, err))
-			req = blk_fetch_request(q);
+			req = jsfd_next_request();
 	}
 }
 
+static void jsfd_do_request(struct request_queue *q)
+{
+	jsfd_request();
+}
+
 /*
  * The memory devices use the full 32/64 bits of the offset, and so we cannot
  * check against negative addresses: they are ok. The return value is weird,
@@ -544,8 +571,6 @@ static int jsflash_init(void)
 	return 0;
 }
 
-static struct request_queue *jsf_queue;
-
 static int jsfd_init(void)
 {
 	static DEFINE_SPINLOCK(lock);
@@ -562,6 +587,11 @@ static int jsfd_init(void)
 		struct gendisk *disk = alloc_disk(1);
 		if (!disk)
 			goto out;
+		disk->queue = blk_init_queue(jsfd_do_request, &lock);
+		if (!disk->queue) {
+			put_disk(disk);
+			goto out;
+		}
 		jsfd_disk[i] = disk;
 	}
 
@@ -570,13 +600,6 @@ static int jsfd_init(void)
 		goto out;
 	}
 
-	jsf_queue = blk_init_queue(jsfd_do_request, &lock);
-	if (!jsf_queue) {
-		err = -ENOMEM;
-		unregister_blkdev(JSFD_MAJOR, "jsfd");
-		goto out;
-	}
-
 	for (i = 0; i < JSF_MAX; i++) {
 		struct gendisk *disk = jsfd_disk[i];
 		if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
@@ -589,7 +612,6 @@ static int jsfd_init(void)
 		disk->fops = &jsfd_fops;
 		set_capacity(disk, jdp->dsize >> 9);
 		disk->private_data = jdp;
-		disk->queue = jsf_queue;
 		add_disk(disk);
 		set_disk_ro(disk, 1);
 	}
@@ -619,6 +641,7 @@ static void __exit jsflash_cleanup_module(void)
 	for (i = 0; i < JSF_MAX; i++) {
 		if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
 		del_gendisk(jsfd_disk[i]);
+		blk_cleanup_queue(jsfd_disk[i]->queue);
 		put_disk(jsfd_disk[i]);
 	}
 	if (jsf0.busy)
@@ -628,7 +651,6 @@ static void __exit jsflash_cleanup_module(void)
 
 	misc_deregister(&jsf_dev);
 	unregister_blkdev(JSFD_MAJOR, "jsfd");
-	blk_cleanup_queue(jsf_queue);
 }
 
 module_init(jsflash_init_module);
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index fc2855565a51..93dbe58c47c8 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -166,6 +166,7 @@ scsi_mod-y			+= scsi_scan.o scsi_sysfs.o scsi_devinfo.o
 scsi_mod-$(CONFIG_SCSI_NETLINK)	+= scsi_netlink.o
 scsi_mod-$(CONFIG_SYSCTL)	+= scsi_sysctl.o
 scsi_mod-$(CONFIG_SCSI_PROC_FS)	+= scsi_proc.o
+scsi_mod-$(CONFIG_BLK_DEBUG_FS)	+= scsi_debugfs.o
 scsi_mod-y			+= scsi_trace.o scsi_logging.o
 scsi_mod-$(CONFIG_PM)		+= scsi_pm.o
 scsi_mod-$(CONFIG_SCSI_DH)	+= scsi_dh.o
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 257bbdd0f0b8..6d7840b096e6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -56,7 +56,7 @@ struct lpfc_sli2_slim;
 #define LPFC_MAX_SG_SEG_CNT	4096	/* sg element count per scsi cmnd */
 #define LPFC_MAX_SGL_SEG_CNT	512	/* SGL element count per scsi cmnd */
 #define LPFC_MAX_BPL_SEG_CNT	4096	/* BPL element count per scsi cmnd */
-#define LPFC_MIN_NVME_SEG_CNT	254
+#define LPFC_MAX_NVME_SEG_CNT	128	/* max SGL element cnt per NVME cmnd */
 
 #define LPFC_MAX_SGE_SIZE       0x80000000 /* Maximum data allowed in a SGE */
 #define LPFC_IOCB_LIST_CNT	2250	/* list of IOCBs for fast-path usage. */
@@ -474,6 +474,8 @@ struct lpfc_vport {
 	unsigned long rcv_buffer_time_stamp;
 	uint32_t vport_flag;
 #define STATIC_VPORT	1
+#define FAWWPN_SET	2
+#define FAWWPN_PARAM_CHG	4
 
 	uint16_t fdmi_num_disc;
 	uint32_t fdmi_hba_mask;
@@ -781,6 +783,7 @@ struct lpfc_hba {
 	uint32_t cfg_nvmet_fb_size;
 	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
+	uint32_t cfg_nvme_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
 	uint64_t cfg_soft_wwnn;
 	uint64_t cfg_soft_wwpn;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 22819afbaef5..513fd07715cd 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -2292,6 +2292,8 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr,
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
 	unsigned int cnt = count;
+	uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level;
+	u32 *fawwpn_key = (uint32_t *)&vport->fc_sparam.un.vendorVersion[0];
 
 	/*
 	 * We're doing a simple sanity check for soft_wwpn setting.
@@ -2305,6 +2307,12 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr,
 	 * here. The intent is to protect against the random user or
 	 * application that is just writing attributes.
 	 */
+	if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				 "0051 "LPFC_DRIVER_NAME" soft wwpn can not"
+				 " be enabled: fawwpn is enabled\n");
+		return -EINVAL;
+	}
 
 	/* count may include a LF at end of string */
 	if (buf[cnt-1] == '\n')
@@ -3335,7 +3343,7 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
  * percentage will go to NVME.
  */
 LPFC_ATTR_R(xri_split, 50, 10, 90,
-	     "Division of XRI resources between SCSI and NVME");
+	    "Division of XRI resources between SCSI and NVME");
 
 /*
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 18157d2840a3..a1686c2d863c 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2486,6 +2486,10 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi)
 				mbox, *rpi);
 	else {
 		*rpi = lpfc_sli4_alloc_rpi(phba);
+		if (*rpi == LPFC_RPI_ALLOC_ERROR) {
+			mempool_free(mbox, phba->mbox_mem_pool);
+			return -EBUSY;
+		}
 		status = lpfc_reg_rpi(phba, phba->pport->vpi,
 				phba->pport->fc_myDID,
 				(uint8_t *)&phba->pport->fc_sparam,
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 54e6ac42fbcd..944b32ca4931 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -24,6 +24,7 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *);
 
 struct fc_rport;
 struct fc_frame_header;
+struct lpfc_nvmet_rcv_ctx;
 void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli_read_link_ste(struct lpfc_hba *);
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t);
@@ -99,7 +100,7 @@ void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *);
 
 int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *,
 			struct lpfc_iocbq *, struct lpfc_nodelist *);
-void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t);
+struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did);
 struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *);
 int  lpfc_nlp_put(struct lpfc_nodelist *);
 int  lpfc_nlp_not_used(struct lpfc_nodelist *ndlp);
@@ -245,6 +246,10 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
 struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
 void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
+void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
+			struct lpfc_dmabuf *mp);
+int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
+			       struct fc_frame_header *fc_hdr);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
 int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
@@ -302,6 +307,8 @@ int lpfc_sli_check_eratt(struct lpfc_hba *);
 void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *,
 				    struct lpfc_sli_ring *, uint32_t);
 void lpfc_sli4_handle_received_buffer(struct lpfc_hba *, struct hbq_dmabuf *);
+void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
+			     struct fc_frame_header *fc_hdr, bool aborted);
 void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index d3e9af983015..1487406aea77 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -537,19 +537,53 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 	}
 }
 
+static void
+lpfc_ns_rsp_audit_did(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_nodelist *ndlp = NULL;
+	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+
+	/*
+	 * To conserve rpi's, filter out addresses for other
+	 * vports on the same physical HBAs.
+	 */
+	if (Did != vport->fc_myDID &&
+	    (!lpfc_find_vport_by_did(phba, Did) ||
+	     vport->cfg_peer_port_login)) {
+		if (!phba->nvmet_support) {
+			/* FCPI/NVMEI path. Process Did */
+			lpfc_prep_node_fc4type(vport, Did, fc4_type);
+			return;
+		}
+		/* NVMET path.  NVMET only cares about NVMEI nodes. */
+		list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+			if (ndlp->nlp_type != NLP_NVME_INITIATOR ||
+			    ndlp->nlp_state != NLP_STE_UNMAPPED_NODE)
+				continue;
+			spin_lock_irq(shost->host_lock);
+			if (ndlp->nlp_DID == Did)
+				ndlp->nlp_flag &= ~NLP_NVMET_RECOV;
+			else
+				ndlp->nlp_flag |= NLP_NVMET_RECOV;
+			spin_unlock_irq(shost->host_lock);
+		}
+	}
+}
+
 static int
 lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
 	    uint32_t Size)
 {
-	struct lpfc_hba  *phba = vport->phba;
 	struct lpfc_sli_ct_request *Response =
 		(struct lpfc_sli_ct_request *) mp->virt;
-	struct lpfc_nodelist *ndlp = NULL;
 	struct lpfc_dmabuf *mlast, *next_mp;
 	uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType;
 	uint32_t Did, CTentry;
 	int Cnt;
 	struct list_head head;
+	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_nodelist *ndlp = NULL;
 
 	lpfc_set_disctmo(vport);
 	vport->num_disc_nodes = 0;
@@ -574,19 +608,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
 			/* Get next DID from NameServer List */
 			CTentry = *ctptr++;
 			Did = ((be32_to_cpu(CTentry)) & Mask_DID);
-
-			ndlp = NULL;
-
-			/*
-			 * Check for rscn processing or not
-			 * To conserve rpi's, filter out addresses for other
-			 * vports on the same physical HBAs.
-			 */
-			if ((Did != vport->fc_myDID) &&
-			    ((lpfc_find_vport_by_did(phba, Did) == NULL) ||
-			     vport->cfg_peer_port_login))
-				lpfc_prep_node_fc4type(vport, Did, fc4_type);
-
+			lpfc_ns_rsp_audit_did(vport, Did, fc4_type);
 			if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY)))
 				goto nsout1;
 
@@ -596,6 +618,22 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
 
 	}
 
+	/* All GID_FT entries processed.  If the driver is running in
+	 * in target mode, put impacted nodes into recovery and drop
+	 * the RPI to flush outstanding IO.
+	 */
+	if (vport->phba->nvmet_support) {
+		list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+			if (!(ndlp->nlp_flag & NLP_NVMET_RECOV))
+				continue;
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+						NLP_EVT_DEVICE_RECOVERY);
+			spin_lock_irq(shost->host_lock);
+			ndlp->nlp_flag &= ~NLP_NVMET_RECOV;
+			spin_lock_irq(shost->host_lock);
+		}
+	}
+
 nsout1:
 	list_del(&head);
 	return 0;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 913eed822cb8..fce549a91911 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -745,73 +745,102 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 {
 	struct lpfc_hba   *phba = vport->phba;
 	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
 	int len = 0;
+	int cnt;
 
 	if (phba->nvmet_support) {
 		if (!phba->targetport)
 			return len;
 		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-		len += snprintf(buf+len, size-len,
+		len += snprintf(buf + len, size - len,
 				"\nNVME Targetport Statistics\n");
 
-		len += snprintf(buf+len, size-len,
+		len += snprintf(buf + len, size - len,
 				"LS: Rcv %08x Drop %08x Abort %08x\n",
 				atomic_read(&tgtp->rcv_ls_req_in),
 				atomic_read(&tgtp->rcv_ls_req_drop),
 				atomic_read(&tgtp->xmt_ls_abort));
 		if (atomic_read(&tgtp->rcv_ls_req_in) !=
 		    atomic_read(&tgtp->rcv_ls_req_out)) {
-			len += snprintf(buf+len, size-len,
+			len += snprintf(buf + len, size - len,
 					"Rcv LS: in %08x != out %08x\n",
 					atomic_read(&tgtp->rcv_ls_req_in),
 					atomic_read(&tgtp->rcv_ls_req_out));
 		}
 
-		len += snprintf(buf+len, size-len,
+		len += snprintf(buf + len, size - len,
 				"LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n",
 				atomic_read(&tgtp->xmt_ls_rsp),
 				atomic_read(&tgtp->xmt_ls_drop),
 				atomic_read(&tgtp->xmt_ls_rsp_cmpl),
 				atomic_read(&tgtp->xmt_ls_rsp_error));
 
-		len += snprintf(buf+len, size-len,
+		len += snprintf(buf + len, size - len,
 				"FCP: Rcv %08x Drop %08x\n",
 				atomic_read(&tgtp->rcv_fcp_cmd_in),
 				atomic_read(&tgtp->rcv_fcp_cmd_drop));
 
 		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
 		    atomic_read(&tgtp->rcv_fcp_cmd_out)) {
-			len += snprintf(buf+len, size-len,
+			len += snprintf(buf + len, size - len,
 					"Rcv FCP: in %08x != out %08x\n",
 					atomic_read(&tgtp->rcv_fcp_cmd_in),
 					atomic_read(&tgtp->rcv_fcp_cmd_out));
 		}
 
-		len += snprintf(buf+len, size-len,
-				"FCP Rsp: read %08x readrsp %08x write %08x rsp %08x\n",
+		len += snprintf(buf + len, size - len,
+				"FCP Rsp: read %08x readrsp %08x "
+				"write %08x rsp %08x\n",
 				atomic_read(&tgtp->xmt_fcp_read),
 				atomic_read(&tgtp->xmt_fcp_read_rsp),
 				atomic_read(&tgtp->xmt_fcp_write),
 				atomic_read(&tgtp->xmt_fcp_rsp));
 
-		len += snprintf(buf+len, size-len,
+		len += snprintf(buf + len, size - len,
 				"FCP Rsp: abort %08x drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_abort),
 				atomic_read(&tgtp->xmt_fcp_drop));
 
-		len += snprintf(buf+len, size-len,
+		len += snprintf(buf + len, size - len,
 				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
 				atomic_read(&tgtp->xmt_fcp_rsp_error),
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
-		len += snprintf(buf+len, size-len,
+		len += snprintf(buf + len, size - len,
 				"ABORT: Xmt %08x Err %08x Cmpl %08x",
 				atomic_read(&tgtp->xmt_abort_rsp),
 				atomic_read(&tgtp->xmt_abort_rsp_error),
 				atomic_read(&tgtp->xmt_abort_cmpl));
 
-		len +=  snprintf(buf+len, size-len, "\n");
+		len +=  snprintf(buf + len, size - len, "\n");
+
+		cnt = 0;
+		spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		list_for_each_entry_safe(ctxp, next_ctxp,
+				&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+				list) {
+			cnt++;
+		}
+		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		if (cnt) {
+			len += snprintf(buf + len, size - len,
+					"ABORT: %d ctx entries\n", cnt);
+			spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+			list_for_each_entry_safe(ctxp, next_ctxp,
+				    &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+				    list) {
+				if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ))
+					break;
+				len += snprintf(buf + len, size - len,
+						"Entry: oxid %x state %x "
+						"flag %x\n",
+						ctxp->oxid, ctxp->state,
+						ctxp->flag);
+			}
+			spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		}
 	} else {
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
@@ -3128,8 +3157,6 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			datqp->queue_id, datqp->entry_count,
 			datqp->entry_size, datqp->host_index,
 			datqp->hba_index);
-	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
-
 	return len;
 }
 
@@ -5700,10 +5727,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	struct lpfc_hba   *phba = vport->phba;
 
-	if (vport->disc_trc) {
-		kfree(vport->disc_trc);
-		vport->disc_trc = NULL;
-	}
+	kfree(vport->disc_trc);
+	vport->disc_trc = NULL;
 
 	debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
 	vport->debug_disc_trc = NULL;
@@ -5770,10 +5795,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 		debugfs_remove(phba->debug_readRef); /* readRef */
 		phba->debug_readRef = NULL;
 
-		if (phba->slow_ring_trc) {
-			kfree(phba->slow_ring_trc);
-			phba->slow_ring_trc = NULL;
-		}
+		kfree(phba->slow_ring_trc);
+		phba->slow_ring_trc = NULL;
 
 		/* slow_ring_trace */
 		debugfs_remove(phba->debug_slow_ring_trc);
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index f4ff99d95db3..9d5a379f4b15 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -157,6 +157,7 @@ struct lpfc_node_rrq {
 #define NLP_LOGO_SND       0x00000100	/* sent LOGO request for this entry */
 #define NLP_RNID_SND       0x00000400	/* sent RNID request for this entry */
 #define NLP_ELS_SND_MASK   0x000007e0	/* sent ELS request for this entry */
+#define NLP_NVMET_RECOV    0x00001000   /* NVMET auditing node for recovery. */
 #define NLP_DEFER_RM       0x00010000	/* Remove this ndlp if no longer used */
 #define NLP_DELAY_TMO      0x00020000	/* delay timeout is running for node */
 #define NLP_NPR_2B_DISC    0x00040000	/* node is included in num_disc_nodes */
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index a5ca37e45fb6..67827e397431 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -603,9 +603,11 @@ lpfc_check_clean_addr_bit(struct lpfc_vport *vport,
 		memcmp(&vport->fabric_portname, &sp->portName,
 			sizeof(struct lpfc_name)) ||
 		memcmp(&vport->fabric_nodename, &sp->nodeName,
-			sizeof(struct lpfc_name)))
+			sizeof(struct lpfc_name)) ||
+		(vport->vport_flag & FAWWPN_PARAM_CHG)) {
 		fabric_param_changed = 1;
-
+		vport->vport_flag &= ~FAWWPN_PARAM_CHG;
+	}
 	/*
 	 * Word 1 Bit 31 in common service parameter is overloaded.
 	 * Word 1 Bit 31 in FLOGI request is multiple NPort request
@@ -895,10 +897,9 @@ lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 			 * Cannot find existing Fabric ndlp, so allocate a
 			 * new one
 			 */
-			ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+			ndlp = lpfc_nlp_init(vport, PT2PT_RemoteID);
 			if (!ndlp)
 				goto fail;
-			lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID);
 		} else if (!NLP_CHK_NODE_ACT(ndlp)) {
 			ndlp = lpfc_enable_node(vport, ndlp,
 						NLP_STE_UNUSED_NODE);
@@ -1364,7 +1365,6 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba)
 int
 lpfc_initial_flogi(struct lpfc_vport *vport)
 {
-	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_nodelist *ndlp;
 
 	vport->port_state = LPFC_FLOGI;
@@ -1374,10 +1374,9 @@ lpfc_initial_flogi(struct lpfc_vport *vport)
 	ndlp = lpfc_findnode_did(vport, Fabric_DID);
 	if (!ndlp) {
 		/* Cannot find existing Fabric ndlp, so allocate a new one */
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, Fabric_DID);
 		if (!ndlp)
 			return 0;
-		lpfc_nlp_init(vport, ndlp, Fabric_DID);
 		/* Set the node type */
 		ndlp->nlp_type |= NLP_FABRIC;
 		/* Put ndlp onto node list */
@@ -1418,17 +1417,15 @@ lpfc_initial_flogi(struct lpfc_vport *vport)
 int
 lpfc_initial_fdisc(struct lpfc_vport *vport)
 {
-	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_nodelist *ndlp;
 
 	/* First look for the Fabric ndlp */
 	ndlp = lpfc_findnode_did(vport, Fabric_DID);
 	if (!ndlp) {
 		/* Cannot find existing Fabric ndlp, so allocate a new one */
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, Fabric_DID);
 		if (!ndlp)
 			return 0;
-		lpfc_nlp_init(vport, ndlp, Fabric_DID);
 		/* Put ndlp onto node list */
 		lpfc_enqueue_node(vport, ndlp);
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
@@ -1564,14 +1561,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
 					     phba->active_rrq_pool);
 			return ndlp;
 		}
-		new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC);
+		new_ndlp = lpfc_nlp_init(vport, ndlp->nlp_DID);
 		if (!new_ndlp) {
 			if (active_rrqs_xri_bitmap)
 				mempool_free(active_rrqs_xri_bitmap,
 					     phba->active_rrq_pool);
 			return ndlp;
 		}
-		lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID);
 	} else if (!NLP_CHK_NODE_ACT(new_ndlp)) {
 		rc = memcmp(&ndlp->nlp_portname, name,
 			    sizeof(struct lpfc_name));
@@ -2845,10 +2841,9 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
 
 	ndlp = lpfc_findnode_did(vport, nportid);
 	if (!ndlp) {
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, nportid);
 		if (!ndlp)
 			return 1;
-		lpfc_nlp_init(vport, ndlp, nportid);
 		lpfc_enqueue_node(vport, ndlp);
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
 		ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
@@ -2938,10 +2933,9 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
 
 	ndlp = lpfc_findnode_did(vport, nportid);
 	if (!ndlp) {
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, nportid);
 		if (!ndlp)
 			return 1;
-		lpfc_nlp_init(vport, ndlp, nportid);
 		lpfc_enqueue_node(vport, ndlp);
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
 		ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
@@ -4403,7 +4397,7 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
 	memset(pcmd, 0, cmdsize);
 
-	*((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
+	*((uint32_t *)(pcmd)) = elsrspcmd;
 	pcmd += sizeof(uint32_t);
 
 	/* For PRLI, remainder of payload is PRLI parameter page */
@@ -5867,8 +5861,11 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport)
 		    (ndlp->nlp_state == NLP_STE_UNUSED_NODE) ||
 		    !lpfc_rscn_payload_check(vport, ndlp->nlp_DID))
 			continue;
+
+		/* NVME Target mode does not do RSCN Recovery. */
 		if (vport->phba->nvmet_support)
 			continue;
+
 		lpfc_disc_state_machine(vport, ndlp, NULL,
 					NLP_EVT_DEVICE_RECOVERY);
 		lpfc_cancel_retry_delay_tmo(vport, ndlp);
@@ -6133,7 +6130,6 @@ int
 lpfc_els_handle_rscn(struct lpfc_vport *vport)
 {
 	struct lpfc_nodelist *ndlp;
-	struct lpfc_hba *phba = vport->phba;
 
 	/* Ignore RSCN if the port is being torn down. */
 	if (vport->load_flag & FC_UNLOADING) {
@@ -6157,22 +6153,16 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
 	ndlp = lpfc_findnode_did(vport, NameServer_DID);
 	if (ndlp && NLP_CHK_NODE_ACT(ndlp)
 	    && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
-		/* Good ndlp, issue CT Request to NameServer */
+		/* Good ndlp, issue CT Request to NameServer.  Need to
+		 * know how many gidfts were issued.  If none, then just
+		 * flush the RSCN.  Otherwise, the outstanding requests
+		 * need to complete.
+		 */
 		vport->gidft_inp = 0;
-		if (lpfc_issue_gidft(vport) == 0)
-			/* Wait for NameServer query cmpl before we can
-			 * continue
-			 */
+		if (lpfc_issue_gidft(vport) > 0)
 			return 1;
 	} else {
-		/* If login to NameServer does not exist, issue one */
-		/* Good status, issue PLOGI to NameServer */
-		ndlp = lpfc_findnode_did(vport, NameServer_DID);
-		if (ndlp && NLP_CHK_NODE_ACT(ndlp))
-			/* Wait for NameServer login cmpl before we can
-			   continue */
-			return 1;
-
+		/* Nameserver login in question.  Revalidate. */
 		if (ndlp) {
 			ndlp = lpfc_enable_node(vport, ndlp,
 						NLP_STE_PLOGI_ISSUE);
@@ -6182,12 +6172,11 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
 			}
 			ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE;
 		} else {
-			ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+			ndlp = lpfc_nlp_init(vport, NameServer_DID);
 			if (!ndlp) {
 				lpfc_els_flush_rscn(vport);
 				return 0;
 			}
-			lpfc_nlp_init(vport, ndlp, NameServer_DID);
 			ndlp->nlp_prev_state = ndlp->nlp_state;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
 		}
@@ -7746,11 +7735,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	ndlp = lpfc_findnode_did(vport, did);
 	if (!ndlp) {
 		/* Cannot find existing Fabric ndlp, so allocate a new one */
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, did);
 		if (!ndlp)
 			goto dropit;
-
-		lpfc_nlp_init(vport, ndlp, did);
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 		newnode = 1;
 		if ((did & Fabric_DID_MASK) == Fabric_DID_MASK)
@@ -8193,7 +8180,6 @@ lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 static void
 lpfc_start_fdmi(struct lpfc_vport *vport)
 {
-	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_nodelist *ndlp;
 
 	/* If this is the first time, allocate an ndlp and initialize
@@ -8202,9 +8188,8 @@ lpfc_start_fdmi(struct lpfc_vport *vport)
 	 */
 	ndlp = lpfc_findnode_did(vport, FDMI_DID);
 	if (!ndlp) {
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, FDMI_DID);
 		if (ndlp) {
-			lpfc_nlp_init(vport, ndlp, FDMI_DID);
 			ndlp->nlp_type |= NLP_FABRIC;
 		} else {
 			return;
@@ -8257,7 +8242,7 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
 
 	ndlp = lpfc_findnode_did(vport, NameServer_DID);
 	if (!ndlp) {
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, NameServer_DID);
 		if (!ndlp) {
 			if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
 				lpfc_disc_start(vport);
@@ -8268,7 +8253,6 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
 					 "0251 NameServer login: no memory\n");
 			return;
 		}
-		lpfc_nlp_init(vport, ndlp, NameServer_DID);
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
 		ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
 		if (!ndlp) {
@@ -8771,7 +8755,7 @@ lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	pcmd += sizeof(uint32_t); /* Node Name */
 	pcmd += sizeof(uint32_t); /* Node Name */
 	memcpy(pcmd, &vport->fc_nodename, 8);
-
+	memset(sp->un.vendorVersion, 0, sizeof(sp->un.vendorVersion));
 	lpfc_set_disctmo(vport);
 
 	phba->fc_stat.elsXmitFDISC++;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 180b072beef6..0482c5580331 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -3002,6 +3002,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
 	struct lpfc_vport  *vport = pmb->vport;
+	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct serv_parm *sp = &vport->fc_sparam;
 	uint32_t ed_tov;
 
@@ -3031,6 +3032,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	}
 
 	lpfc_update_vport_wwn(vport);
+	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
 	if (vport->port_type == LPFC_PHYSICAL_PORT) {
 		memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn));
 		memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn));
@@ -3309,6 +3311,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_sli_ring *pring;
 	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+	uint8_t attn_type;
 
 	/* Unblock ELS traffic */
 	pring = lpfc_phba_elsring(phba);
@@ -3325,6 +3328,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	}
 
 	la = (struct lpfc_mbx_read_top *) &pmb->u.mb.un.varReadTop;
+	attn_type = bf_get(lpfc_mbx_read_top_att_type, la);
 
 	memcpy(&phba->alpa_map[0], mp->virt, 128);
 
@@ -3337,7 +3341,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 	if (phba->fc_eventTag <= la->eventTag) {
 		phba->fc_stat.LinkMultiEvent++;
-		if (bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP)
+		if (attn_type == LPFC_ATT_LINK_UP)
 			if (phba->fc_eventTag != 0)
 				lpfc_linkdown(phba);
 	}
@@ -3353,7 +3357,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	}
 
 	phba->link_events++;
-	if ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) &&
+	if ((attn_type == LPFC_ATT_LINK_UP) &&
 	    !(phba->sli.sli_flag & LPFC_MENLO_MAINT)) {
 		phba->fc_stat.LinkUp++;
 		if (phba->link_flag & LS_LOOPBACK_MODE) {
@@ -3379,8 +3383,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 					phba->wait_4_mlo_maint_flg);
 		}
 		lpfc_mbx_process_link_up(phba, la);
-	} else if (bf_get(lpfc_mbx_read_top_att_type, la) ==
-		   LPFC_ATT_LINK_DOWN) {
+	} else if (attn_type == LPFC_ATT_LINK_DOWN ||
+		   attn_type == LPFC_ATT_UNEXP_WWPN) {
 		phba->fc_stat.LinkDown++;
 		if (phba->link_flag & LS_LOOPBACK_MODE)
 			lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
@@ -3389,6 +3393,14 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 				"Data: x%x x%x x%x\n",
 				la->eventTag, phba->fc_eventTag,
 				phba->pport->port_state, vport->fc_flag);
+		else if (attn_type == LPFC_ATT_UNEXP_WWPN)
+			lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
+				"1313 Link Down UNEXP WWPN Event x%x received "
+				"Data: x%x x%x x%x x%x x%x\n",
+				la->eventTag, phba->fc_eventTag,
+				phba->pport->port_state, vport->fc_flag,
+				bf_get(lpfc_mbx_read_top_mm, la),
+				bf_get(lpfc_mbx_read_top_fa, la));
 		else
 			lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
 				"1305 Link Down Event x%x received "
@@ -3399,8 +3411,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 				bf_get(lpfc_mbx_read_top_fa, la));
 		lpfc_mbx_issue_link_down(phba);
 	}
-	if ((phba->sli.sli_flag & LPFC_MENLO_MAINT) &&
-	    ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP))) {
+	if (phba->sli.sli_flag & LPFC_MENLO_MAINT &&
+	    attn_type == LPFC_ATT_LINK_UP) {
 		if (phba->link_state != LPFC_LINK_DOWN) {
 			phba->fc_stat.LinkDown++;
 			lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
@@ -4136,7 +4148,6 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		       int old_state, int new_state)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
-	struct lpfc_hba *phba = vport->phba;
 
 	if (new_state == NLP_STE_UNMAPPED_NODE) {
 		ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
@@ -4155,14 +4166,14 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 			lpfc_unregister_remote_port(ndlp);
 		}
 
-		/* Notify the NVME transport of this rport's loss */
-		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
-		    (vport->phba->nvmet_support == 0) &&
-		    ((ndlp->nlp_fc4_type & NLP_FC4_NVME) ||
-		    (ndlp->nlp_DID == Fabric_DID))) {
+		/* Notify the NVME transport of this rport's loss on the
+		 * Initiator.  For NVME Target, should upcall transport
+		 * in the else clause when API available.
+		 */
+		if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
 			vport->phba->nport_event_cnt++;
-			lpfc_nvme_unregister_port(vport, ndlp);
+			if (vport->phba->nvmet_support == 0)
+				lpfc_nvme_unregister_port(vport, ndlp);
 		}
 	}
 
@@ -4368,10 +4379,17 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	uint32_t did;
 	unsigned long flags;
 	unsigned long *active_rrqs_xri_bitmap = NULL;
+	int rpi = LPFC_RPI_ALLOC_ERROR;
 
 	if (!ndlp)
 		return NULL;
 
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		rpi = lpfc_sli4_alloc_rpi(vport->phba);
+		if (rpi == LPFC_RPI_ALLOC_ERROR)
+			return NULL;
+	}
+
 	spin_lock_irqsave(&phba->ndlp_lock, flags);
 	/* The ndlp should not be in memory free mode */
 	if (NLP_CHK_FREE_REQ(ndlp)) {
@@ -4381,7 +4399,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				"usgmap:x%x refcnt:%d\n",
 				(void *)ndlp, ndlp->nlp_usg_map,
 				kref_read(&ndlp->kref));
-		return NULL;
+		goto free_rpi;
 	}
 	/* The ndlp should not already be in active mode */
 	if (NLP_CHK_NODE_ACT(ndlp)) {
@@ -4391,7 +4409,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				"usgmap:x%x refcnt:%d\n",
 				(void *)ndlp, ndlp->nlp_usg_map,
 				kref_read(&ndlp->kref));
-		return NULL;
+		goto free_rpi;
 	}
 
 	/* Keep the original DID */
@@ -4409,7 +4427,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
 	spin_unlock_irqrestore(&phba->ndlp_lock, flags);
 	if (vport->phba->sli_rev == LPFC_SLI_REV4) {
-		ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba);
+		ndlp->nlp_rpi = rpi;
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
 				 "0008 rpi:%x DID:%x flg:%x refcnt:%d "
 				 "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
@@ -4426,6 +4444,11 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		"node enable:       did:x%x",
 		ndlp->nlp_DID, 0, 0);
 	return ndlp;
+
+free_rpi:
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		lpfc_sli4_free_rpi(vport->phba, rpi);
+	return NULL;
 }
 
 void
@@ -5104,65 +5127,82 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 
 	ndlp = lpfc_findnode_did(vport, did);
 	if (!ndlp) {
+		if (vport->phba->nvmet_support)
+			return NULL;
 		if ((vport->fc_flag & FC_RSCN_MODE) != 0 &&
 		    lpfc_rscn_payload_check(vport, did) == 0)
 			return NULL;
-		ndlp = (struct lpfc_nodelist *)
-		     mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, did);
 		if (!ndlp)
 			return NULL;
-		lpfc_nlp_init(vport, ndlp, did);
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
-		if (vport->phba->nvmet_support)
-			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
 		return ndlp;
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
+		if (vport->phba->nvmet_support)
+			return NULL;
 		ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE);
 		if (!ndlp)
 			return NULL;
-		if (vport->phba->nvmet_support)
-			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
 		return ndlp;
 	}
 
+	/* The NVME Target does not want to actively manage an rport.
+	 * The goal is to allow the target to reset its state and clear
+	 * pending IO in preparation for the initiator to recover.
+	 */
 	if ((vport->fc_flag & FC_RSCN_MODE) &&
 	    !(vport->fc_flag & FC_NDISC_ACTIVE)) {
 		if (lpfc_rscn_payload_check(vport, did)) {
-			/* If we've already received a PLOGI from this NPort
-			 * we don't need to try to discover it again.
-			 */
-			if (ndlp->nlp_flag & NLP_RCV_PLOGI)
-				return NULL;
 
 			/* Since this node is marked for discovery,
 			 * delay timeout is not needed.
 			 */
 			lpfc_cancel_retry_delay_tmo(vport, ndlp);
+
+			/* NVME Target mode waits until rport is known to be
+			 * impacted by the RSCN before it transitions.  No
+			 * active management - just go to NPR provided the
+			 * node had a valid login.
+			 */
 			if (vport->phba->nvmet_support)
 				return ndlp;
+
+			/* If we've already received a PLOGI from this NPort
+			 * we don't need to try to discover it again.
+			 */
+			if (ndlp->nlp_flag & NLP_RCV_PLOGI)
+				return NULL;
+
 			spin_lock_irq(shost->host_lock);
 			ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 			spin_unlock_irq(shost->host_lock);
 		} else
 			ndlp = NULL;
 	} else {
-		/* If we've already received a PLOGI from this NPort,
-		 * or we are already in the process of discovery on it,
-		 * we don't need to try to discover it again.
+		/* If the initiator received a PLOGI from this NPort or if the
+		 * initiator is already in the process of discovery on it,
+		 * there's no need to try to discover it again.
 		 */
 		if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE ||
 		    ndlp->nlp_state == NLP_STE_PLOGI_ISSUE ||
-		    ndlp->nlp_flag & NLP_RCV_PLOGI)
+		    (!vport->phba->nvmet_support &&
+		     ndlp->nlp_flag & NLP_RCV_PLOGI))
 			return NULL;
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+
 		if (vport->phba->nvmet_support)
 			return ndlp;
+
+		/* Moving to NPR state clears unsolicited flags and
+		 * allows for rediscovery
+		 */
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
@@ -5887,16 +5927,31 @@ lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi)
 	return NULL;
 }
 
-void
-lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
-	      uint32_t did)
+struct lpfc_nodelist *
+lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did)
 {
+	struct lpfc_nodelist *ndlp;
+	int rpi = LPFC_RPI_ALLOC_ERROR;
+
+	if (vport->phba->sli_rev == LPFC_SLI_REV4) {
+		rpi = lpfc_sli4_alloc_rpi(vport->phba);
+		if (rpi == LPFC_RPI_ALLOC_ERROR)
+			return NULL;
+	}
+
+	ndlp = mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL);
+	if (!ndlp) {
+		if (vport->phba->sli_rev == LPFC_SLI_REV4)
+			lpfc_sli4_free_rpi(vport->phba, rpi);
+		return NULL;
+	}
+
 	memset(ndlp, 0, sizeof (struct lpfc_nodelist));
 
 	lpfc_initialize_node(vport, ndlp, did);
 	INIT_LIST_HEAD(&ndlp->nlp_listp);
 	if (vport->phba->sli_rev == LPFC_SLI_REV4) {
-		ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba);
+		ndlp->nlp_rpi = rpi;
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
 				 "0007 rpi:%x DID:%x flg:%x refcnt:%d "
 				 "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID,
@@ -5918,7 +5973,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		"node init:       did:x%x",
 		ndlp->nlp_DID, 0, 0);
 
-	return;
+	return ndlp;
 }
 
 /* This routine releases all resources associated with a specifc NPort's ndlp
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 15ca21484150..26a5647e057e 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -509,6 +509,8 @@ struct class_parms {
 	uint8_t word3Reserved2;	/* Fc Word 3, bit  0: 7 */
 };
 
+#define FAPWWN_KEY_VENDOR	0x42524344 /*valid vendor version fawwpn key*/
+
 struct serv_parm {	/* Structure is in Big Endian format */
 	struct csp cmn;
 	struct lpfc_name portName;
@@ -2885,6 +2887,7 @@ struct lpfc_mbx_read_top {
 #define LPFC_ATT_RESERVED    0x00	/* Reserved - attType */
 #define LPFC_ATT_LINK_UP     0x01	/* Link is up */
 #define LPFC_ATT_LINK_DOWN   0x02	/* Link is down */
+#define LPFC_ATT_UNEXP_WWPN  0x06	/* Link is down Unexpected WWWPN */
 	uint32_t word3;
 #define lpfc_mbx_read_top_alpa_granted_SHIFT	24
 #define lpfc_mbx_read_top_alpa_granted_MASK	0x000000FF
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 15277705cb6b..1d12f2be36bc 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -2720,6 +2720,9 @@ struct lpfc_mbx_request_features {
 #define lpfc_mbx_rq_ftr_rq_ifip_SHIFT		7
 #define lpfc_mbx_rq_ftr_rq_ifip_MASK		0x00000001
 #define lpfc_mbx_rq_ftr_rq_ifip_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_iaar_SHIFT		9
+#define lpfc_mbx_rq_ftr_rq_iaar_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_iaar_WORD		word2
 #define lpfc_mbx_rq_ftr_rq_perfh_SHIFT		11
 #define lpfc_mbx_rq_ftr_rq_perfh_MASK		0x00000001
 #define lpfc_mbx_rq_ftr_rq_perfh_WORD		word2
@@ -3853,6 +3856,7 @@ struct lpfc_acqe_fc_la {
 #define LPFC_FC_LA_TYPE_NO_HARD_ALPA	0x3
 #define LPFC_FC_LA_TYPE_MDS_LINK_DOWN	0x4
 #define LPFC_FC_LA_TYPE_MDS_LOOPBACK	0x5
+#define LPFC_FC_LA_TYPE_UNEXP_WWPN	0x6
 #define lpfc_acqe_fc_la_port_type_SHIFT		6
 #define lpfc_acqe_fc_la_port_type_MASK		0x00000003
 #define lpfc_acqe_fc_la_port_type_WORD		word0
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 6cc561b04211..90ae354a9c45 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -42,6 +42,10 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -52,6 +56,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -335,6 +340,9 @@ lpfc_dump_wakeup_param_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 void
 lpfc_update_vport_wwn(struct lpfc_vport *vport)
 {
+	uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level;
+	u32 *fawwpn_key = (u32 *)&vport->fc_sparam.un.vendorVersion[0];
+
 	/* If the soft name exists then update it using the service params */
 	if (vport->phba->cfg_soft_wwnn)
 		u64_to_wwn(vport->phba->cfg_soft_wwnn,
@@ -354,9 +362,25 @@ lpfc_update_vport_wwn(struct lpfc_vport *vport)
 		memcpy(&vport->fc_sparam.nodeName, &vport->fc_nodename,
 			sizeof(struct lpfc_name));
 
-	if (vport->fc_portname.u.wwn[0] == 0 || vport->phba->cfg_soft_wwpn)
+	/*
+	 * If the port name has changed, then set the Param changes flag
+	 * to unreg the login
+	 */
+	if (vport->fc_portname.u.wwn[0] != 0 &&
+		memcmp(&vport->fc_portname, &vport->fc_sparam.portName,
+			sizeof(struct lpfc_name)))
+		vport->vport_flag |= FAWWPN_PARAM_CHG;
+
+	if (vport->fc_portname.u.wwn[0] == 0 ||
+	    vport->phba->cfg_soft_wwpn ||
+	    (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) ||
+	    vport->vport_flag & FAWWPN_SET) {
 		memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
 			sizeof(struct lpfc_name));
+		vport->vport_flag &= ~FAWWPN_SET;
+		if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR)
+			vport->vport_flag |= FAWWPN_SET;
+	}
 	else
 		memcpy(&vport->fc_sparam.portName, &vport->fc_portname,
 			sizeof(struct lpfc_name));
@@ -1003,8 +1027,10 @@ static int
 lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 {
 	struct lpfc_scsi_buf *psb, *psb_next;
+	struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next;
 	LIST_HEAD(aborts);
 	LIST_HEAD(nvme_aborts);
+	LIST_HEAD(nvmet_aborts);
 	unsigned long iflag = 0;
 	struct lpfc_sglq *sglq_entry = NULL;
 
@@ -1027,16 +1053,10 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 	list_for_each_entry(sglq_entry,
 		&phba->sli4_hba.lpfc_abts_els_sgl_list, list)
 		sglq_entry->state = SGL_FREED;
-	list_for_each_entry(sglq_entry,
-		&phba->sli4_hba.lpfc_abts_nvmet_sgl_list, list)
-		sglq_entry->state = SGL_FREED;
 
 	list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
 			&phba->sli4_hba.lpfc_els_sgl_list);
 
-	if (phba->sli4_hba.nvme_wq)
-		list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list,
-				 &phba->sli4_hba.lpfc_nvmet_sgl_list);
 
 	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	/* abts_scsi_buf_list_lock required because worker thread uses this
@@ -1053,6 +1073,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 		spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list,
 				 &nvme_aborts);
+		list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+				 &nvmet_aborts);
 		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 	}
 
@@ -1066,13 +1088,20 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 	list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
 	spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
 
-	list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
-		psb->pCmd = NULL;
-		psb->status = IOSTAT_SUCCESS;
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
+			psb->pCmd = NULL;
+			psb->status = IOSTAT_SUCCESS;
+		}
+		spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
+		list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
+		spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+
+		list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
+			ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
+			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		}
 	}
-	spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
-	list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
-	spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
 
 	lpfc_sli4_free_sp_events(phba);
 	return 0;
@@ -2874,34 +2903,38 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
 {
 	struct lpfc_nodelist  *ndlp, *next_ndlp;
 	struct lpfc_vport **vports;
-	int i;
+	int i, rpi;
+	unsigned long flags;
 
 	if (phba->sli_rev != LPFC_SLI_REV4)
 		return;
 
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL) {
-		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
-			if (vports[i]->load_flag & FC_UNLOADING)
-				continue;
+	if (vports == NULL)
+		return;
 
-			list_for_each_entry_safe(ndlp, next_ndlp,
-						 &vports[i]->fc_nodes,
-						 nlp_listp) {
-				if (NLP_CHK_NODE_ACT(ndlp)) {
-					ndlp->nlp_rpi =
-						lpfc_sli4_alloc_rpi(phba);
-					lpfc_printf_vlog(ndlp->vport, KERN_INFO,
-							 LOG_NODE,
-							 "0009 rpi:%x DID:%x "
-							 "flg:%x map:%x %p\n",
-							 ndlp->nlp_rpi,
-							 ndlp->nlp_DID,
-							 ndlp->nlp_flag,
-							 ndlp->nlp_usg_map,
-							 ndlp);
-				}
+	for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+		if (vports[i]->load_flag & FC_UNLOADING)
+			continue;
+
+		list_for_each_entry_safe(ndlp, next_ndlp,
+					 &vports[i]->fc_nodes,
+					 nlp_listp) {
+			if (!NLP_CHK_NODE_ACT(ndlp))
+				continue;
+			rpi = lpfc_sli4_alloc_rpi(phba);
+			if (rpi == LPFC_RPI_ALLOC_ERROR) {
+				spin_lock_irqsave(&phba->ndlp_lock, flags);
+				NLP_CLR_NODE_ACT(ndlp);
+				spin_unlock_irqrestore(&phba->ndlp_lock, flags);
+				continue;
 			}
+			ndlp->nlp_rpi = rpi;
+			lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+					 "0009 rpi:%x DID:%x "
+					 "flg:%x map:%x %p\n", ndlp->nlp_rpi,
+					 ndlp->nlp_DID, ndlp->nlp_flag,
+					 ndlp->nlp_usg_map, ndlp);
 		}
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
@@ -3508,6 +3541,12 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba)
 	spin_unlock(&phba->scsi_buf_list_put_lock);
 	spin_unlock_irq(&phba->scsi_buf_list_get_lock);
 
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"6060 Current allocated SCSI xri-sgl count:%d, "
+			"maximum  SCSI xri count:%d (split:%d)\n",
+			phba->sli4_hba.scsi_xri_cnt,
+			phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split);
+
 	if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) {
 		/* max scsi xri shrinked below the allocated scsi buffers */
 		scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt -
@@ -4508,9 +4547,15 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 		/* Parse and translate link attention fields */
 		la = (struct lpfc_mbx_read_top *)&pmb->u.mb.un.varReadTop;
 		la->eventTag = acqe_fc->event_tag;
-		bf_set(lpfc_mbx_read_top_att_type, la,
-		       LPFC_FC_LA_TYPE_LINK_DOWN);
 
+		if (phba->sli4_hba.link_state.status ==
+		    LPFC_FC_LA_TYPE_UNEXP_WWPN) {
+			bf_set(lpfc_mbx_read_top_att_type, la,
+			       LPFC_FC_LA_TYPE_UNEXP_WWPN);
+		} else {
+			bf_set(lpfc_mbx_read_top_att_type, la,
+			       LPFC_FC_LA_TYPE_LINK_DOWN);
+		}
 		/* Invoke the mailbox command callback function */
 		lpfc_mbx_cmpl_read_topology(phba, pmb);
 
@@ -4716,10 +4761,9 @@ lpfc_sli4_perform_vport_cvl(struct lpfc_vport *vport)
 	ndlp = lpfc_findnode_did(vport, Fabric_DID);
 	if (!ndlp) {
 		/* Cannot find existing Fabric ndlp, so allocate a new one */
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, Fabric_DID);
 		if (!ndlp)
 			return 0;
-		lpfc_nlp_init(vport, ndlp, Fabric_DID);
 		/* Set the node type */
 		ndlp->nlp_type |= NLP_FABRIC;
 		/* Put ndlp onto node list */
@@ -5778,6 +5822,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		/* Initialize the Abort nvme buffer list used by driver */
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 		/* Fast-path XRI aborted CQ Event work queue list */
 		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
 	}
@@ -5809,6 +5854,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_vfi_blk_list);
 	INIT_LIST_HEAD(&phba->lpfc_vpi_blk_list);
 
+	/* Initialize mboxq lists. If the early init routines fail
+	 * these lists need to be correctly initialized.
+	 */
+	INIT_LIST_HEAD(&phba->sli.mboxq);
+	INIT_LIST_HEAD(&phba->sli.mboxq_cmpl);
+
 	/* initialize optic_state to 0xFF */
 	phba->sli4_hba.lnk_info.optic_state = 0xff;
 
@@ -5874,6 +5925,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 					"READ_NV, mbxStatus x%x\n",
 					bf_get(lpfc_mqe_command, &mboxq->u.mqe),
 					bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+			mempool_free(mboxq, phba->mbox_mem_pool);
 			rc = -EIO;
 			goto out_free_bsmbx;
 		}
@@ -6398,7 +6450,7 @@ lpfc_init_sgl_list(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list);
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_sgl_list);
-	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 
 	/* els xri-sgl book keeping */
 	phba->sli4_hba.els_xri_cnt = 0;
@@ -7799,7 +7851,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 
 	/* Create Fast Path FCP WQs */
 	wqesize = (phba->fcp_embed_io) ?
-				LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
+		LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
 	qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount);
 	if (!qdesc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -7830,7 +7882,7 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx, io_channel, max;
+	int idx, io_channel;
 
 	/*
 	 * Create HBA Record arrays.
@@ -7991,15 +8043,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		if (lpfc_alloc_nvme_wq_cq(phba, idx))
 			goto out_error;
 
-	/* allocate MRQ CQs */
-	max = phba->cfg_nvme_io_channel;
-	if (max < phba->cfg_nvmet_mrq)
-		max = phba->cfg_nvmet_mrq;
-
-	for (idx = 0; idx < max; idx++)
-		if (lpfc_alloc_nvme_wq_cq(phba, idx))
-			goto out_error;
-
 	if (phba->nvmet_support) {
 		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
 			qdesc = lpfc_sli4_queue_alloc(phba,
@@ -8221,11 +8264,11 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 
 	/* Release FCP cqs */
 	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq,
-					phba->cfg_fcp_io_channel);
+				 phba->cfg_fcp_io_channel);
 
 	/* Release FCP wqs */
 	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq,
-					phba->cfg_fcp_io_channel);
+				 phba->cfg_fcp_io_channel);
 
 	/* Release FCP CQ mapping array */
 	lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map);
@@ -8571,15 +8614,15 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0528 %s not allocated\n",
 				phba->sli4_hba.mbx_cq ?
-						"Mailbox WQ" : "Mailbox CQ");
+				"Mailbox WQ" : "Mailbox CQ");
 		rc = -ENOMEM;
 		goto out_destroy;
 	}
 
 	rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
-					phba->sli4_hba.mbx_cq,
-					phba->sli4_hba.mbx_wq,
-					NULL, 0, LPFC_MBOX);
+			       phba->sli4_hba.mbx_cq,
+			       phba->sli4_hba.mbx_wq,
+			       NULL, 0, LPFC_MBOX);
 	if (rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"0529 Failed setup of mailbox WQ/CQ: rc = 0x%x\n",
@@ -9934,17 +9977,19 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 {
 	int wait_time = 0;
 	int nvme_xri_cmpl = 1;
+	int nvmet_xri_cmpl = 1;
 	int fcp_xri_cmpl = 1;
 	int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
-	int nvmet_xri_cmpl =
-			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
 		fcp_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 		nvme_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+		nvmet_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+	}
 
 	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl ||
 	       !nvmet_xri_cmpl) {
@@ -9970,9 +10015,12 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 			msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1);
 			wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1;
 		}
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 			nvme_xri_cmpl = list_empty(
 				&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+			nvmet_xri_cmpl = list_empty(
+				&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+		}
 
 		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
 			fcp_xri_cmpl = list_empty(
@@ -9981,8 +10029,6 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 		els_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 
-		nvmet_xri_cmpl =
-			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 	}
 }
 
@@ -10048,9 +10094,14 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
 	/* Stop kthread signal shall trigger work_done one more time */
 	kthread_stop(phba->worker_thread);
 
+	/* Unset the queues shared with the hardware then release all
+	 * allocated resources.
+	 */
+	lpfc_sli4_queue_unset(phba);
+	lpfc_sli4_queue_destroy(phba);
+
 	/* Reset SLI4 HBA FCoE function */
 	lpfc_pci_function_reset(phba);
-	lpfc_sli4_queue_destroy(phba);
 
 	/* Stop the SLI4 device port */
 	phba->pport->work_port_events = 0;
@@ -10306,6 +10357,7 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 	}
 
 	/* Initialize and populate the iocb list per host */
+
 	error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -11051,7 +11103,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct lpfc_hba   *phba;
 	struct lpfc_vport *vport = NULL;
 	struct Scsi_Host  *shost = NULL;
-	int error;
+	int error, cnt;
 	uint32_t cfg_mode, intr_mode;
 
 	/* Allocate memory for HBA structure */
@@ -11085,12 +11137,15 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_unset_pci_mem_s4;
 	}
 
-	/* Initialize and populate the iocb list per host */
+	cnt = phba->cfg_iocb_cnt * 1024;
+	if (phba->nvmet_support)
+		cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq;
 
+	/* Initialize and populate the iocb list per host */
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2821 initialize iocb list %d.\n",
-			phba->cfg_iocb_cnt*1024);
-	error = lpfc_init_iocb_list(phba, phba->cfg_iocb_cnt*1024);
+			"2821 initialize iocb list %d total %d\n",
+			phba->cfg_iocb_cnt, cnt);
+	error = lpfc_init_iocb_list(phba, cnt);
 
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -11177,7 +11232,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	if ((phba->nvmet_support == 0) &&
 	    (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
 		/* Create NVME binding with nvme_fc_transport. This
-		 * ensures the vport is initialized.
+		 * ensures the vport is initialized.  If the localport
+		 * create fails, it should not unload the driver to
+		 * support field issues.
 		 */
 		error = lpfc_nvme_create_localport(vport);
 		if (error) {
@@ -11185,7 +11242,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 					"6004 NVME registration failed, "
 					"error x%x\n",
 					error);
-			goto out_disable_intr;
 		}
 	}
 
@@ -11984,6 +12040,7 @@ int
 lpfc_fof_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
+	uint32_t wqesize;
 
 	/* Create FOF EQ */
 	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
@@ -12004,8 +12061,11 @@ lpfc_fof_queue_create(struct lpfc_hba *phba)
 		phba->sli4_hba.oas_cq = qdesc;
 
 		/* Create OAS WQ */
-		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+		wqesize = (phba->fcp_embed_io) ?
+				LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
+		qdesc = lpfc_sli4_queue_alloc(phba, wqesize,
 					      phba->sli4_hba.wq_ecount);
+
 		if (!qdesc)
 			goto out_error;
 
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index a928f5187fa4..ce25a18367b5 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -2083,9 +2083,12 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq)
 	if (phba->max_vpi && phba->cfg_enable_npiv)
 		bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1);
 
-	if (phba->nvmet_support)
+	if (phba->nvmet_support) {
 		bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1);
-
+		/* iaab/iaar NOT set for now */
+		 bf_set(lpfc_mbx_rq_ftr_rq_iaab, &mboxq->u.mqe.un.req_ftrs, 0);
+		 bf_set(lpfc_mbx_rq_ftr_rq_iaar, &mboxq->u.mqe.un.req_ftrs, 0);
+	}
 	return;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 061626bdf701..8777c2d5f50d 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -361,8 +361,12 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	case  NLP_STE_PRLI_ISSUE:
 	case  NLP_STE_UNMAPPED_NODE:
 	case  NLP_STE_MAPPED_NODE:
-		/* lpfc_plogi_confirm_nport skips fabric did, handle it here */
-		if (!(ndlp->nlp_type & NLP_FABRIC)) {
+		/* For initiators, lpfc_plogi_confirm_nport skips fabric did.
+		 * For target mode, execute implicit logo.
+		 * Fabric nodes go into NPR.
+		 */
+		if (!(ndlp->nlp_type & NLP_FABRIC) &&
+		    !(phba->nvmet_support)) {
 			lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb,
 					 ndlp, NULL);
 			return 1;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 0024de1c6c1f..8008c8205fb6 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -401,6 +401,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nodelist *ndlp;
 	struct ulp_bde64 *bpl;
 	struct lpfc_dmabuf *bmp;
+	uint16_t ntype, nstate;
 
 	/* there are two dma buf in the request, actually there is one and
 	 * the second one is just the start address + cmd size.
@@ -417,11 +418,26 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
 	vport = lport->vport;
 
 	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
-	if (!ndlp) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-				 "6043 Could not find node for DID %x\n",
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6051 DID x%06x not an active rport.\n",
 				 pnvme_rport->port_id);
-		return 1;
+		return -ENODEV;
+	}
+
+	/* The remote node has to be a mapped nvme target or an
+	 * unmapped nvme initiator or it's an error.
+	 */
+	ntype = ndlp->nlp_type;
+	nstate = ndlp->nlp_state;
+	if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) ||
+	    (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6088 DID x%06x not ready for "
+				 "IO. State x%x, Type x%x\n",
+				 pnvme_rport->port_id,
+				 ndlp->nlp_state, ndlp->nlp_type);
+		return -ENODEV;
 	}
 	bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
 	if (!bmp) {
@@ -456,7 +472,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
 
 	/* Expand print to include key fields. */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
-			 "6051 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
+			 "6149 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
 			 "rsplen:%d %pad %pad\n",
 			 pnvme_lport, pnvme_rport,
 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
@@ -745,6 +761,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	struct nvme_fc_cmd_iu *cp;
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nodelist *ndlp;
+	struct lpfc_nvme_fcpreq_priv *freqpriv;
 	unsigned long flags;
 	uint32_t code;
 	uint16_t cid, sqhd, data;
@@ -772,9 +789,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	ndlp = rport->ndlp;
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
-				 "6061 rport %p, ndlp %p, DID x%06x ndlp "
-				 "not ready.\n",
-				 rport, ndlp, rport->remoteport->port_id);
+				 "6061 rport %p,  DID x%06x node not ready.\n",
+				 rport, rport->remoteport->port_id);
 
 		ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id);
 		if (!ndlp) {
@@ -853,15 +869,18 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 				break;
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
 					 "6081 NVME Completion Protocol Error: "
-					 "status x%x result x%x placed x%x\n",
+					 "xri %x status x%x result x%x "
+					 "placed x%x\n",
+					 lpfc_ncmd->cur_iocbq.sli4_xritag,
 					 lpfc_ncmd->status, lpfc_ncmd->result,
 					 wcqe->total_data_placed);
 			break;
 		default:
 out_err:
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
-					 "6072 NVME Completion Error: "
+					 "6072 NVME Completion Error: xri %x "
 					 "status x%x result x%x placed x%x\n",
+					 lpfc_ncmd->cur_iocbq.sli4_xritag,
 					 lpfc_ncmd->status, lpfc_ncmd->result,
 					 wcqe->total_data_placed);
 			nCmd->transferred_length = 0;
@@ -900,6 +919,8 @@ out_err:
 			phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++;
 	}
 #endif
+	freqpriv = nCmd->private;
+	freqpriv->nvme_buf = NULL;
 	nCmd->done(nCmd);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
@@ -1099,12 +1120,12 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
 
 		first_data_sgl = sgl;
 		lpfc_ncmd->seg_cnt = nCmd->sg_cnt;
-		if (lpfc_ncmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+		if (lpfc_ncmd->seg_cnt > phba->cfg_nvme_seg_cnt) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 					"6058 Too many sg segments from "
 					"NVME Transport.  Max %d, "
 					"nvmeIO sg_cnt %d\n",
-					phba->cfg_sg_seg_cnt,
+					phba->cfg_nvme_seg_cnt,
 					lpfc_ncmd->seg_cnt);
 			lpfc_ncmd->seg_cnt = 0;
 			return 1;
@@ -1196,6 +1217,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_buf *lpfc_ncmd;
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nvme_qhandle *lpfc_queue_info;
+	struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t start = 0;
 #endif
@@ -1274,7 +1296,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	 * Do not let the IO hang out forever.  There is no midlayer issuing
 	 * an abort so inform the FW of the maximum IO pending time.
 	 */
-	pnvme_fcreq->private = (void *)lpfc_ncmd;
+	freqpriv->nvme_buf = lpfc_ncmd;
 	lpfc_ncmd->nvmeCmd = pnvme_fcreq;
 	lpfc_ncmd->nrport = rport;
 	lpfc_ncmd->ndlp = ndlp;
@@ -1404,6 +1426,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_buf *lpfc_nbuf;
 	struct lpfc_iocbq *abts_buf;
 	struct lpfc_iocbq *nvmereq_wqe;
+	struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private;
 	union lpfc_wqe *abts_wqe;
 	unsigned long flags;
 	int ret_val;
@@ -1414,7 +1437,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	phba = vport->phba;
 
 	/* Announce entry to new IO submit field. */
-	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
 			 "6002 Abort Request to rport DID x%06x "
 			 "for nvme_fc_req %p\n",
 			 pnvme_rport->port_id,
@@ -1444,7 +1467,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	/* The remote node has to be ready to send an abort. */
 	if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) &&
 	    !(ndlp->nlp_type & NLP_NVME_TARGET)) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6048 rport %p, DID x%06x not ready for "
 				 "IO. State x%x, Type x%x\n",
 				 rport, pnvme_rport->port_id,
@@ -1459,27 +1482,28 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	/* driver queued commands are in process of being flushed */
 	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6139 Driver in reset cleanup - flushing "
 				 "NVME Req now.  hba_flag x%x\n",
 				 phba->hba_flag);
 		return;
 	}
 
-	lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private;
+	lpfc_nbuf = freqpriv->nvme_buf;
 	if (!lpfc_nbuf) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6140 NVME IO req has no matching lpfc nvme "
 				 "io buffer.  Skipping abort req.\n");
 		return;
 	} else if (!lpfc_nbuf->nvmeCmd) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6141 lpfc NVME IO req has no nvme_fcreq "
 				 "io buffer.  Skipping abort req.\n");
 		return;
 	}
+	nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
 
 	/*
 	 * The lpfc_nbuf and the mapped nvme_fcreq in the driver's
@@ -1490,23 +1514,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	 */
 	if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6143 NVME req mismatch: "
 				 "lpfc_nbuf %p nvmeCmd %p, "
-				 "pnvme_fcreq %p.  Skipping Abort\n",
+				 "pnvme_fcreq %p.  Skipping Abort xri x%x\n",
 				 lpfc_nbuf, lpfc_nbuf->nvmeCmd,
-				 pnvme_fcreq);
+				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
 		return;
 	}
 
 	/* Don't abort IOs no longer on the pending queue. */
-	nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
 	if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6142 NVME IO req %p not queued - skipping "
-				 "abort req\n",
-				 pnvme_fcreq);
+				 "abort req xri x%x\n",
+				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
 		return;
 	}
 
@@ -1517,21 +1540,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	/* Outstanding abort is in progress */
 	if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6144 Outstanding NVME I/O Abort Request "
 				 "still pending on nvme_fcreq %p, "
-				 "lpfc_ncmd %p\n",
-				 pnvme_fcreq, lpfc_nbuf);
+				 "lpfc_ncmd %p xri x%x\n",
+				 pnvme_fcreq, lpfc_nbuf,
+				 nvmereq_wqe->sli4_xritag);
 		return;
 	}
 
 	abts_buf = __lpfc_sli_get_iocbq(phba);
 	if (!abts_buf) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6136 No available abort wqes. Skipping "
-				 "Abts req for nvme_fcreq %p.\n",
-				 pnvme_fcreq);
+				 "Abts req for nvme_fcreq %p xri x%x\n",
+				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
 		return;
 	}
 
@@ -1580,7 +1604,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (ret_val == IOCB_ERROR) {
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
 				 "6137 Failed abts issue_wqe with status x%x "
 				 "for nvme_fcreq %p.\n",
 				 ret_val, pnvme_fcreq);
@@ -1588,8 +1612,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 		return;
 	}
 
-	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
-			 "6138 Transport Abort NVME Request Issued for\n"
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
+			 "6138 Transport Abort NVME Request Issued for "
 			 "ox_id x%x on reqtag x%x\n",
 			 nvmereq_wqe->sli4_xritag,
 			 abts_buf->iotag);
@@ -1618,7 +1642,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = {
 	.local_priv_sz = sizeof(struct lpfc_nvme_lport),
 	.remote_priv_sz = sizeof(struct lpfc_nvme_rport),
 	.lsrqst_priv_sz = 0,
-	.fcprqst_priv_sz = 0,
+	.fcprqst_priv_sz = sizeof(struct lpfc_nvme_fcpreq_priv),
 };
 
 /**
@@ -2049,7 +2073,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 		if (lpfc_test_rrq_active(phba, ndlp,
 					 lpfc_ncmd->cur_iocbq.sli4_lxritag))
 			continue;
-		list_del(&lpfc_ncmd->list);
+		list_del_init(&lpfc_ncmd->list);
 		found = 1;
 		break;
 	}
@@ -2064,7 +2088,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 			if (lpfc_test_rrq_active(
 				phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
 				continue;
-			list_del(&lpfc_ncmd->list);
+			list_del_init(&lpfc_ncmd->list);
 			found = 1;
 			break;
 		}
@@ -2092,6 +2116,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
 
 	lpfc_ncmd->nonsg_phys = 0;
 	if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6310 XB release deferred for "
+				"ox_id x%x on reqtag x%x\n",
+				lpfc_ncmd->cur_iocbq.sli4_xritag,
+				lpfc_ncmd->cur_iocbq.iotag);
+
 		spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
 					iflag);
 		lpfc_ncmd->nvmeCmd = NULL;
@@ -2142,8 +2172,18 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
 	nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
 
-	/* For now need + 1 to get around NVME transport logic */
-	lpfc_nvme_template.max_sgl_segments = phba->cfg_sg_seg_cnt + 1;
+	/* Limit to LPFC_MAX_NVME_SEG_CNT.
+	 * For now need + 1 to get around NVME transport logic.
+	 */
+	if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_INIT,
+				 "6300 Reducing sg segment cnt to %d\n",
+				 LPFC_MAX_NVME_SEG_CNT);
+		phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+	} else {
+		phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
+	}
+	lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
 	lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel;
 
 	/* localport is allocated from the stack, but the registration
@@ -2249,12 +2289,23 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 void
 lpfc_nvme_update_localport(struct lpfc_vport *vport)
 {
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 
 	localport = vport->localport;
+	if (!localport) {
+		lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME,
+				 "6710 Update NVME fail. No localport\n");
+		return;
+	}
 	lport = (struct lpfc_nvme_lport *)localport->private;
-
+	if (!lport) {
+		lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME,
+				 "6171 Update NVME fail. localP %p, No lport\n",
+				 localport);
+		return;
+	}
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
 			 "6012 Update NVME lport %p did x%x\n",
 			 localport, vport->fc_myDID);
@@ -2268,7 +2319,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
 			 "6030 bound lport %p to DID x%06x\n",
 			 lport, localport->port_id);
-
+#endif
 }
 
 int
@@ -2409,6 +2460,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport;
 	struct nvme_fc_remote_port *remoteport;
+	unsigned long wait_tmo;
 
 	localport = vport->localport;
 
@@ -2451,11 +2503,12 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		 * before proceeding.  This guarantees the transport and driver
 		 * have completed the unreg process.
 		 */
-		ret = wait_for_completion_timeout(&rport->rport_unreg_done, 5);
+		wait_tmo = msecs_to_jiffies(5000);
+		ret = wait_for_completion_timeout(&rport->rport_unreg_done,
+						  wait_tmo);
 		if (ret == 0) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-					 "6169 Unreg nvme wait failed %d\n",
-					 ret);
+					 "6169 Unreg nvme wait timeout\n");
 		}
 	}
 	return;
@@ -2463,7 +2516,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
  input_err:
 #endif
 	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
-			 "6168: State error: lport %p, rport%p FCID x%06x\n",
+			 "6168 State error: lport %p, rport%p FCID x%06x\n",
 			 vport->localport, ndlp->rport, ndlp->nlp_DID);
 }
 
@@ -2494,7 +2547,7 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
 				 &phba->sli4_hba.lpfc_abts_nvme_buf_list,
 				 list) {
 		if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
-			list_del(&lpfc_ncmd->list);
+			list_del_init(&lpfc_ncmd->list);
 			lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
 			lpfc_ncmd->status = IOSTAT_SUCCESS;
 			spin_unlock(
@@ -2510,6 +2563,12 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
 					rxid, 1);
 				lpfc_sli4_abts_err_handler(phba, ndlp, axri);
 			}
+
+			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+					"6311 XRI Aborted xri x%x tag x%x "
+					"released\n",
+					xri, lpfc_ncmd->cur_iocbq.iotag);
+
 			lpfc_release_nvme_buf(phba, lpfc_ncmd);
 			if (rrq_empty)
 				lpfc_worker_wake_up(phba);
@@ -2518,4 +2577,8 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
 	}
 	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6312 XRI Aborted xri x%x not found\n", xri);
+
 }
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 1347deb8dd6c..ec32f45daa66 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -21,12 +21,7 @@
  * included with this package.                                     *
  ********************************************************************/
 
-#define LPFC_NVME_MIN_SEGS		16
-#define LPFC_NVME_DEFAULT_SEGS		66	/* 256K IOs - 64 + 2 */
-#define LPFC_NVME_MAX_SEGS		510
-#define LPFC_NVMET_MIN_POSTBUF		16
-#define LPFC_NVMET_DEFAULT_POSTBUF	1024
-#define LPFC_NVMET_MAX_POSTBUF		4096
+#define LPFC_NVME_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
 #define LPFC_NVME_WQSIZE		256
 
 #define LPFC_NVME_ERSP_LEN		0x20
@@ -102,3 +97,7 @@ struct lpfc_nvme_buf {
 	uint64_t ts_data_nvme;
 #endif
 };
+
+struct lpfc_nvme_fcpreq_priv {
+	struct lpfc_nvme_buf *nvme_buf;
+};
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index acba1b67e505..94434e621c33 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -71,6 +71,26 @@ static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
 					   struct lpfc_nvmet_rcv_ctx *,
 					   uint32_t, uint16_t);
 
+void
+lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
+{
+	unsigned long iflag;
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+			"6313 NVMET Defer ctx release xri x%x flg x%x\n",
+			ctxp->oxid, ctxp->flag);
+
+	spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag);
+	if (ctxp->flag & LPFC_NVMET_CTX_RLS) {
+		spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock,
+				       iflag);
+		return;
+	}
+	ctxp->flag |= LPFC_NVMET_CTX_RLS;
+	list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+	spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag);
+}
+
 /**
  * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response
  * @phba: Pointer to HBA context object.
@@ -139,6 +159,11 @@ lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
 		   struct lpfc_dmabuf *mp)
 {
 	if (ctxp) {
+		if (ctxp->flag)
+			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6314 rq_post ctx xri x%x flag x%x\n",
+				ctxp->oxid, ctxp->flag);
+
 		if (ctxp->txrdy) {
 			pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
 				      ctxp->txrdy_phys);
@@ -337,39 +362,55 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 #endif
 
 	ctxp = cmdwqe->context2;
+	ctxp->flag &= ~LPFC_NVMET_IO_INP;
+
 	rsp = &ctxp->ctx.fcp_req;
 	op = rsp->op;
-	ctxp->flag &= ~LPFC_NVMET_IO_INP;
 
 	status = bf_get(lpfc_wcqe_c_status, wcqe);
 	result = wcqe->parameter;
 
-	if (!phba->targetport)
-		goto out;
+	if (phba->targetport)
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	else
+		tgtp = NULL;
 
 	lpfc_nvmeio_data(phba, "NVMET FCP CMPL: xri x%x op x%x status x%x\n",
 			 ctxp->oxid, op, status);
 
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (status) {
 		rsp->fcp_error = NVME_SC_DATA_XFER_ERROR;
 		rsp->transferred_length = 0;
-		atomic_inc(&tgtp->xmt_fcp_rsp_error);
+		if (tgtp)
+			atomic_inc(&tgtp->xmt_fcp_rsp_error);
+
+		/* pick up SLI4 exhange busy condition */
+		if (bf_get(lpfc_wcqe_c_xb, wcqe)) {
+			ctxp->flag |= LPFC_NVMET_XBUSY;
+
+			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+					"6315 IO Cmpl XBUSY: xri x%x: %x/%x\n",
+					ctxp->oxid, status, result);
+		} else {
+			ctxp->flag &= ~LPFC_NVMET_XBUSY;
+		}
+
 	} else {
 		rsp->fcp_error = NVME_SC_SUCCESS;
 		if (op == NVMET_FCOP_RSP)
 			rsp->transferred_length = rsp->rsplen;
 		else
 			rsp->transferred_length = rsp->transfer_length;
-		atomic_inc(&tgtp->xmt_fcp_rsp_cmpl);
+		if (tgtp)
+			atomic_inc(&tgtp->xmt_fcp_rsp_cmpl);
 	}
 
-out:
 	if ((op == NVMET_FCOP_READDATA_RSP) ||
 	    (op == NVMET_FCOP_RSP)) {
 		/* Sanity check */
 		ctxp->state = LPFC_NVMET_STE_DONE;
 		ctxp->entry_cnt++;
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 		if (phba->ktime_on) {
 			if (rsp->op == NVMET_FCOP_READDATA_RSP) {
@@ -408,9 +449,7 @@ out:
 		if (phba->ktime_on)
 			lpfc_nvmet_ktime(phba, ctxp);
 #endif
-		/* Let Abort cmpl repost the context */
-		if (!(ctxp->flag & LPFC_NVMET_ABORT_OP))
-			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		/* lpfc_nvmet_xmt_fcp_release() will recycle the context */
 	} else {
 		ctxp->entry_cnt++;
 		start_clean = offsetof(struct lpfc_iocbq, wqe);
@@ -519,7 +558,6 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
 	struct lpfc_iocbq *nvmewqeq;
-	unsigned long iflags;
 	int rc;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -544,32 +582,12 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	}
 #endif
 
-	if (rsp->op == NVMET_FCOP_ABORT) {
-		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-				"6103 Abort op: oxri x%x %d cnt %d\n",
-				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
-
-		lpfc_nvmeio_data(phba, "NVMET FCP ABRT: "
-				 "xri x%x state x%x cnt x%x\n",
-				 ctxp->oxid, ctxp->state, ctxp->entry_cnt);
-
-		atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
-		ctxp->entry_cnt++;
-		ctxp->flag |= LPFC_NVMET_ABORT_OP;
-		if (ctxp->flag & LPFC_NVMET_IO_INP)
-			lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
-						       ctxp->oxid);
-		else
-			lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
-							 ctxp->oxid);
-		return 0;
-	}
-
 	/* Sanity check */
-	if (ctxp->state == LPFC_NVMET_STE_ABORT) {
+	if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) ||
+	    (ctxp->state == LPFC_NVMET_STE_ABORT)) {
 		atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6102 Bad state IO x%x aborted\n",
+				"6102 IO xri x%x aborted\n",
 				ctxp->oxid);
 		rc = -ENXIO;
 		goto aerr;
@@ -594,10 +612,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
 			 ctxp->oxid, rsp->op, rsp->rsplen);
 
-	/* For now we take hbalock */
-	spin_lock_irqsave(&phba->hbalock, iflags);
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
-	spin_unlock_irqrestore(&phba->hbalock, iflags);
 	if (rc == WQE_SUCCESS) {
 		ctxp->flag |= LPFC_NVMET_IO_INP;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -634,10 +649,79 @@ lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport)
 	complete(&tport->tport_unreg_done);
 }
 
+static void
+lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
+			 struct nvmefc_tgt_fcp_req *req)
+{
+	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
+	struct lpfc_nvmet_rcv_ctx *ctxp =
+		container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_hba *phba = ctxp->phba;
+	unsigned long flags;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6103 Abort op: oxri x%x flg x%x cnt %d\n",
+			ctxp->oxid, ctxp->flag, ctxp->entry_cnt);
+
+	lpfc_nvmeio_data(phba, "NVMET FCP ABRT: "
+			 "xri x%x flg x%x cnt x%x\n",
+			 ctxp->oxid, ctxp->flag, ctxp->entry_cnt);
+
+	atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
+	ctxp->entry_cnt++;
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
+
+	/* Since iaab/iaar are NOT set, we need to check
+	 * if the firmware is in process of aborting IO
+	 */
+	if (ctxp->flag & LPFC_NVMET_XBUSY) {
+		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+		return;
+	}
+	ctxp->flag |= LPFC_NVMET_ABORT_OP;
+	if (ctxp->flag & LPFC_NVMET_IO_INP)
+		lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+					       ctxp->oxid);
+	else
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+						 ctxp->oxid);
+	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+}
+
+static void
+lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
+			   struct nvmefc_tgt_fcp_req *rsp)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp =
+		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_hba *phba = ctxp->phba;
+	unsigned long flags;
+	bool aborting = false;
+
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
+	if ((ctxp->flag & LPFC_NVMET_ABORT_OP) ||
+	    (ctxp->flag & LPFC_NVMET_XBUSY)) {
+		aborting = true;
+		/* let the abort path do the real release */
+		lpfc_nvmet_defer_release(phba, ctxp);
+	}
+	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+
+	lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid,
+			 ctxp->state, 0);
+
+	if (aborting)
+		return;
+
+	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+}
+
 static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.targetport_delete = lpfc_nvmet_targetport_delete,
 	.xmt_ls_rsp     = lpfc_nvmet_xmt_ls_rsp,
 	.fcp_op         = lpfc_nvmet_xmt_fcp_op,
+	.fcp_abort      = lpfc_nvmet_xmt_fcp_abort,
+	.fcp_req_release = lpfc_nvmet_xmt_fcp_release,
 
 	.max_hw_queues  = 1,
 	.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
@@ -666,10 +750,23 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
 	pinfo.port_id = vport->fc_myDID;
 
+	/* Limit to LPFC_MAX_NVME_SEG_CNT.
+	 * For now need + 1 to get around NVME transport logic.
+	 */
+	if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
+				"6400 Reducing sg segment cnt to %d\n",
+				LPFC_MAX_NVME_SEG_CNT);
+		phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
+	} else {
+		phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
+	}
+	lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
 	lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
-	lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt;
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
-					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
+					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED |
+					   NVMET_FCTGTFEAT_CMD_IN_ISR |
+					   NVMET_FCTGTFEAT_OPDONE_IN_ISR;
 
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
@@ -750,7 +847,120 @@ void
 lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 			    struct sli4_wcqe_xri_aborted *axri)
 {
-	/* TODO: work in progress */
+	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+	uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+	struct lpfc_nodelist *ndlp;
+	unsigned long iflag = 0;
+	int rrq_empty = 0;
+	bool released = false;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6317 XB aborted xri x%x rxid x%x\n", xri, rxid);
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		return;
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	list_for_each_entry_safe(ctxp, next_ctxp,
+				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+				 list) {
+		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+			continue;
+
+		/* Check if we already received a free context call
+		 * and we have completed processing an abort situation.
+		 */
+		if (ctxp->flag & LPFC_NVMET_CTX_RLS &&
+		    !(ctxp->flag & LPFC_NVMET_ABORT_OP)) {
+			list_del(&ctxp->list);
+			released = true;
+		}
+		ctxp->flag &= ~LPFC_NVMET_XBUSY;
+		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+
+		rrq_empty = list_empty(&phba->active_rrq_list);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		ndlp = lpfc_findnode_did(phba->pport, ctxp->sid);
+		if (ndlp && NLP_CHK_NODE_ACT(ndlp) &&
+		    (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE ||
+		     ndlp->nlp_state == NLP_STE_MAPPED_NODE)) {
+			lpfc_set_rrq_active(phba, ndlp,
+				ctxp->rqb_buffer->sglq->sli4_lxritag,
+				rxid, 1);
+			lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+		}
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6318 XB aborted %x flg x%x (%x)\n",
+				ctxp->oxid, ctxp->flag, released);
+		if (released)
+			lpfc_nvmet_rq_post(phba, ctxp,
+					   &ctxp->rqb_buffer->hbuf);
+		if (rrq_empty)
+			lpfc_worker_wake_up(phba);
+		return;
+	}
+	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+}
+
+int
+lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
+			   struct fc_frame_header *fc_hdr)
+
+{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+	struct nvmefc_tgt_fcp_req *rsp;
+	uint16_t xri;
+	unsigned long iflag = 0;
+
+	xri = be16_to_cpu(fc_hdr->fh_ox_id);
+
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	list_for_each_entry_safe(ctxp, next_ctxp,
+				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+				 list) {
+		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+			continue;
+
+		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+		spin_lock_irqsave(&ctxp->ctxlock, iflag);
+		ctxp->flag |= LPFC_NVMET_ABTS_RCV;
+		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+
+		lpfc_nvmeio_data(phba,
+			"NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
+			xri, smp_processor_id(), 0);
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6319 NVMET Rcv ABTS:acc xri x%x\n", xri);
+
+		rsp = &ctxp->ctx.fcp_req;
+		nvmet_fc_rcv_fcp_abort(phba->targetport, rsp);
+
+		/* Respond with BA_ACC accordingly */
+		lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1);
+		return 0;
+	}
+	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+	lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
+			 xri, smp_processor_id(), 1);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6320 NVMET Rcv ABTS:rjt xri x%x\n", xri);
+
+	/* Respond with BA_RJT accordingly */
+	lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 0);
+#endif
+	return 0;
 }
 
 void
@@ -940,6 +1150,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->rqb_buffer = nvmebuf;
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
+	spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	if (phba->ktime_on) {
@@ -962,8 +1173,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	}
 #endif
 
-	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d from %06x\n",
-			 oxid, size, sid);
+	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
+			 oxid, size, smp_processor_id());
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
 	/*
@@ -1237,11 +1448,11 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 		return NULL;
 	}
 
-	if (rsp->sg_cnt > phba->cfg_sg_seg_cnt) {
+	if (rsp->sg_cnt > phba->cfg_nvme_seg_cnt) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6109 lpfc_nvmet_prep_fcp_wqe: seg cnt err: "
-				"NPORT x%x oxid:x%x\n",
-				ctxp->sid, ctxp->oxid);
+				"NPORT x%x oxid:x%x cnt %d\n",
+				ctxp->sid, ctxp->oxid, phba->cfg_nvme_seg_cnt);
 		return NULL;
 	}
 
@@ -1593,6 +1804,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	uint32_t status, result;
+	unsigned long flags;
+	bool released = false;
 
 	ctxp = cmdwqe->context2;
 	status = bf_get(lpfc_wcqe_c_status, wcqe);
@@ -1601,21 +1814,46 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	atomic_inc(&tgtp->xmt_abort_cmpl);
 
+	ctxp->state = LPFC_NVMET_STE_DONE;
+
+	/* Check if we already received a free context call
+	 * and we have completed processing an abort situation.
+	 */
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
+	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
+	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+		list_del(&ctxp->list);
+		released = true;
+	}
+	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
-			"6165 Abort cmpl: xri x%x WCQE: %08x %08x %08x %08x\n",
-			ctxp->oxid, wcqe->word0, wcqe->total_data_placed,
+			"6165 ABORT cmpl: xri x%x flg x%x (%d) "
+			"WCQE: %08x %08x %08x %08x\n",
+			ctxp->oxid, ctxp->flag, released,
+			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
 
-	ctxp->state = LPFC_NVMET_STE_DONE;
-	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	/*
+	 * if transport has released ctx, then can reuse it. Otherwise,
+	 * will be recycled by transport release call.
+	 */
+	if (released)
+		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
 
 	cmdwqe->context2 = NULL;
 	cmdwqe->context3 = NULL;
 	lpfc_sli_release_iocbq(phba, cmdwqe);
+
+	/* Since iaab/iaar are NOT set, there is no work left.
+	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+	 * should have been called already.
+	 */
 }
 
 /**
- * lpfc_nvmet_xmt_fcp_abort_cmp - Completion handler for ABTS
+ * lpfc_nvmet_unsol_fcp_abort_cmp - Completion handler for ABTS
  * @phba: Pointer to HBA context object.
  * @cmdwqe: Pointer to driver command WQE object.
  * @wcqe: Pointer to driver response CQE object.
@@ -1625,12 +1863,14 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
  * The function frees memory resources used for the NVME commands.
  **/
 static void
-lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
-			     struct lpfc_wcqe_complete *wcqe)
+lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			       struct lpfc_wcqe_complete *wcqe)
 {
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
+	unsigned long flags;
 	uint32_t status, result;
+	bool released = false;
 
 	ctxp = cmdwqe->context2;
 	status = bf_get(lpfc_wcqe_c_status, wcqe);
@@ -1639,23 +1879,55 @@ lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	atomic_inc(&tgtp->xmt_abort_cmpl);
 
+	if (!ctxp) {
+		/* if context is clear, related io alrady complete */
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6070 ABTS cmpl: WCQE: %08x %08x %08x %08x\n",
+				wcqe->word0, wcqe->total_data_placed,
+				result, wcqe->word3);
+		return;
+	}
+
+	/* Sanity check */
+	if (ctxp->state != LPFC_NVMET_STE_ABORT) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+				"6112 ABTS Wrong state:%d oxid x%x\n",
+				ctxp->state, ctxp->oxid);
+	}
+
+	/* Check if we already received a free context call
+	 * and we have completed processing an abort situation.
+	 */
+	ctxp->state = LPFC_NVMET_STE_DONE;
+	spin_lock_irqsave(&ctxp->ctxlock, flags);
+	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
+	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+		list_del(&ctxp->list);
+		released = true;
+	}
+	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6070 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
-			ctxp, wcqe->word0, wcqe->total_data_placed,
+			"6316 ABTS cmpl xri x%x flg x%x (%x) "
+			"WCQE: %08x %08x %08x %08x\n",
+			ctxp->oxid, ctxp->flag, released,
+			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
-
-	if (ctxp) {
-		/* Sanity check */
-		if (ctxp->state != LPFC_NVMET_STE_ABORT) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
-					"6112 ABORT Wrong state:%d oxid x%x\n",
-					ctxp->state, ctxp->oxid);
-		}
-		ctxp->state = LPFC_NVMET_STE_DONE;
+	/*
+	 * if transport has released ctx, then can reuse it. Otherwise,
+	 * will be recycled by transport release call.
+	 */
+	if (released)
 		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
-		cmdwqe->context2 = NULL;
-		cmdwqe->context3 = NULL;
-	}
+
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
+
+	/* Since iaab/iaar are NOT set, there is no work left.
+	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+	 * should have been called already.
+	 */
 }
 
 /**
@@ -1708,10 +1980,14 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 	struct lpfc_nodelist *ndlp;
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6067 Abort: sid %x xri x%x/x%x\n",
+			"6067 ABTS: sid %x xri x%x/x%x\n",
 			sid, xri, ctxp->wqeq->sli4_xritag);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (!ctxp->wqeq) {
+		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq->hba_wqidx = 0;
+	}
 
 	ndlp = lpfc_findnode_did(phba->pport, sid);
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
@@ -1817,10 +2093,11 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
 		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
-				"6160 Drop ABTS - wrong NDLP state x%x.\n",
+				"6160 Drop ABORT - wrong NDLP state x%x.\n",
 				(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
 		/* No failure to an ABTS request. */
+		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 		return 0;
 	}
 
@@ -1828,9 +2105,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
 	if (!ctxp->abort_wqeq) {
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
-				"6161 Abort failed: No wqeqs: "
+				"6161 ABORT failed: No wqeqs: "
 				"xri: x%x\n", ctxp->oxid);
 		/* No failure to an ABTS request. */
+		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 		return 0;
 	}
 	abts_wqeq = ctxp->abort_wqeq;
@@ -1838,8 +2116,8 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	ctxp->state = LPFC_NVMET_STE_ABORT;
 
 	/* Announce entry to new IO submit field. */
-	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
-			"6162 Abort Request to rport DID x%06x "
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6162 ABORT Request to rport DID x%06x "
 			"for xri x%x x%x\n",
 			ctxp->sid, ctxp->oxid, ctxp->wqeq->sli4_xritag);
 
@@ -1855,6 +2133,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				"NVME Req now. hba_flag x%x oxid x%x\n",
 				phba->hba_flag, ctxp->oxid);
 		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 		return 0;
 	}
 
@@ -1866,6 +2145,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 				"still pending on oxid x%x\n",
 				ctxp->oxid);
 		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 		return 0;
 	}
 
@@ -1913,9 +2193,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	if (rc == WQE_SUCCESS)
 		return 0;
 
+	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
-	lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-			"6166 Failed abts issue_wqe with status x%x "
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+			"6166 Failed ABORT issue_wqe with status x%x "
 			"for oxid x%x.\n",
 			rc, ctxp->oxid);
 	return 1;
@@ -1944,8 +2225,8 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	spin_lock_irqsave(&phba->hbalock, flags);
 	abts_wqeq = ctxp->wqeq;
-	abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_abort_cmp;
-	abts_wqeq->iocb_cmpl = 0;
+	abts_wqeq->wqe_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp;
+	abts_wqeq->iocb_cmpl = NULL;
 	abts_wqeq->iocb_flag |= LPFC_IO_NVMET;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
@@ -1955,7 +2236,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 	}
 
 aerr:
-	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 			"6135 Failed to Issue ABTS for oxid x%x. Status x%x\n",
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index ca96f05c1604..128759fe6650 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -21,9 +21,7 @@
  * included with this package.                                     *
  ********************************************************************/
 
-#define LPFC_NVMET_MIN_SEGS		16
-#define LPFC_NVMET_DEFAULT_SEGS		64	/* 256K IOs */
-#define LPFC_NVMET_MAX_SEGS		510
+#define LPFC_NVMET_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
 #define LPFC_NVMET_SUCCESS_LEN	12
 
 /* Used for NVME Target */
@@ -77,10 +75,12 @@ struct lpfc_nvmet_rcv_ctx {
 		struct nvmefc_tgt_ls_req ls_req;
 		struct nvmefc_tgt_fcp_req fcp_req;
 	} ctx;
+	struct list_head list;
 	struct lpfc_hba *phba;
 	struct lpfc_iocbq *wqeq;
 	struct lpfc_iocbq *abort_wqeq;
 	dma_addr_t txrdy_phys;
+	spinlock_t ctxlock; /* protect flag access */
 	uint32_t *txrdy;
 	uint32_t sid;
 	uint32_t offset;
@@ -97,8 +97,11 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_STE_RSP		4
 #define LPFC_NVMET_STE_DONE		5
 	uint16_t flag;
-#define LPFC_NVMET_IO_INP		1
-#define LPFC_NVMET_ABORT_OP		2
+#define LPFC_NVMET_IO_INP		0x1  /* IO is in progress on exchange */
+#define LPFC_NVMET_ABORT_OP		0x2  /* Abort WQE issued on exchange */
+#define LPFC_NVMET_XBUSY		0x4  /* XB bit set on IO cmpl */
+#define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
+#define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
 	struct rqb_dmabuf *rqb_buffer;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 1c9fa45df7eb..cf19f4976f5f 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -6338,7 +6338,7 @@ lpfc_sli4_get_allocated_extnts(struct lpfc_hba *phba, uint16_t type,
 }
 
 /**
- * lpfc_sli4_repost_sgl_list - Repsot the buffers sgl pages as block
+ * lpfc_sli4_repost_sgl_list - Repost the buffers sgl pages as block
  * @phba: pointer to lpfc hba data structure.
  * @pring: Pointer to driver SLI ring object.
  * @sgl_list: linked link of sgl buffers to post
@@ -13758,7 +13758,10 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
 		lpfc_free_rq_buffer(queue->phba, queue);
 		kfree(queue->rqbp);
 	}
-	kfree(queue->pring);
+
+	if (!list_empty(&queue->wq_list))
+		list_del(&queue->wq_list);
+
 	kfree(queue);
 	return;
 }
@@ -14738,6 +14741,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 	case LPFC_Q_CREATE_VERSION_1:
 		bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1,
 		       wq->entry_count);
+		bf_set(lpfc_mbox_hdr_version, &shdr->request,
+		       LPFC_Q_CREATE_VERSION_1);
+
 		switch (wq->entry_size) {
 		default:
 		case 64:
@@ -15561,6 +15567,8 @@ lpfc_wq_destroy(struct lpfc_hba *phba, struct lpfc_queue *wq)
 	}
 	/* Remove wq from any list */
 	list_del_init(&wq->list);
+	kfree(wq->pring);
+	wq->pring = NULL;
 	mempool_free(mbox, wq->phba->mbox_mem_pool);
 	return status;
 }
@@ -16513,7 +16521,7 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba,
  * This function sends a basic response to a previous unsol sequence abort
  * event after aborting the sequence handling.
  **/
-static void
+void
 lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
 			struct fc_frame_header *fc_hdr, bool aborted)
 {
@@ -16534,14 +16542,13 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
 
 	ndlp = lpfc_findnode_did(vport, sid);
 	if (!ndlp) {
-		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		ndlp = lpfc_nlp_init(vport, sid);
 		if (!ndlp) {
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
 					 "1268 Failed to allocate ndlp for "
 					 "oxid:x%x SID:x%x\n", oxid, sid);
 			return;
 		}
-		lpfc_nlp_init(vport, ndlp, sid);
 		/* Put ndlp onto pport node list */
 		lpfc_enqueue_node(vport, ndlp);
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
@@ -16690,6 +16697,11 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport,
 	}
 	lpfc_in_buf_free(phba, &dmabuf->dbuf);
 
+	if (phba->nvmet_support) {
+		lpfc_nvmet_rcv_unsol_abort(vport, &fc_hdr);
+		return;
+	}
+
 	/* Respond with BA_ACC or BA_RJT accordingly */
 	lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted);
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 710458cf11d6..da46471337c8 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -620,7 +620,7 @@ struct lpfc_sli4_hba {
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_nvmet_sgl_list;
-	struct list_head lpfc_abts_nvmet_sgl_list;
+	struct list_head lpfc_abts_nvmet_ctx_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index d4e95e28f4e3..1c26dc67151b 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.10"
+#define LPFC_DRIVER_VERSION "11.2.0.12"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 9a0339dbc024..c714482bf4c5 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -738,10 +738,9 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
 		ndlp = lpfc_findnode_did(vport, Fabric_DID);
 		if (!ndlp) {
 			/* Cannot find existing Fabric ndlp, allocate one */
-			ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+			ndlp = lpfc_nlp_init(vport, Fabric_DID);
 			if (!ndlp)
 				goto skip_logo;
-			lpfc_nlp_init(vport, ndlp, Fabric_DID);
 			/* Indicate free memory when release */
 			NLP_SET_FREE_REQ(ndlp);
 		} else {
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 6903f03c88af..8a1b94816419 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -477,7 +477,7 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 			     int error)
 {
 	or->async_error = error;
-	or->req_errors = req->errors ? : error;
+	or->req_errors = scsi_req(req)->result ? : error;
 	or->sense_len = scsi_req(req)->sense_len;
 	if (or->sense_len)
 		memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,7 +489,10 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 
 int osd_execute_request(struct osd_request *or)
 {
-	int error = blk_execute_rq(or->request->q, NULL, or->request, 0);
+	int error;
+
+	blk_execute_rq(or->request->q, NULL, or->request, 0);
+	error = scsi_req(or->request)->result ? -EIO : 0;
 
 	_set_error_resid(or, or->request, error);
 	return error;
@@ -1602,7 +1605,7 @@ static int _init_blk_request(struct osd_request *or,
 	req->rq_flags |= RQF_QUIET;
 
 	req->timeout = or->timeout;
-	req->retries = or->retries;
+	scsi_req(req)->retries = or->retries;
 
 	if (has_out) {
 		or->out.req = req;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index c47f4b349bac..67cbed92f07d 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -327,7 +327,7 @@ static void osst_end_async(struct request *req, int update)
 	struct osst_tape *STp = SRpnt->stp;
 	struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
 
-	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
+	STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result;
 #if DEBUG
 	STp->write_pending = 0;
 #endif
@@ -414,7 +414,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 	memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
 	memcpy(rq->cmd, cmd, rq->cmd_len);
 	req->timeout = timeout;
-	req->retries = retries;
+	rq->retries = retries;
 	req->end_io_data = SRpnt;
 
 	blk_execute_rq_nowait(req->q, NULL, req, 1, osst_end_async);
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 84c9098cc089..b6e40fd4c3c1 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -2553,13 +2553,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
 						ql_log(ql_log_warn, vha, 0x7089,
 						    "mbx abort_command "
 						    "failed.\n");
-						bsg_job->req->errors =
+						scsi_req(bsg_job->req)->result =
 						bsg_reply->result = -EIO;
 					} else {
 						ql_dbg(ql_dbg_user, vha, 0x708a,
 						    "mbx abort_command "
 						    "success.\n");
-						bsg_job->req->errors =
+						scsi_req(bsg_job->req)->result =
 						bsg_reply->result = 0;
 					}
 					spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -2570,7 +2570,7 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n");
-	bsg_job->req->errors = bsg_reply->result = -ENXIO;
+	scsi_req(bsg_job->req)->result = bsg_reply->result = -ENXIO;
 	return 0;
 
 done:
diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c
new file mode 100644
index 000000000000..a97c9507103d
--- /dev/null
+++ b/drivers/scsi/scsi_debugfs.c
@@ -0,0 +1,13 @@
+#include <linux/seq_file.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+#include "scsi_debugfs.h"
+
+void scsi_show_rq(struct seq_file *m, struct request *rq)
+{
+	struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req);
+	char buf[80];
+
+	__scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len);
+	seq_printf(m, ", .cmd=%s", buf);
+}
diff --git a/drivers/scsi/scsi_debugfs.h b/drivers/scsi/scsi_debugfs.h
new file mode 100644
index 000000000000..951b043e82d0
--- /dev/null
+++ b/drivers/scsi/scsi_debugfs.h
@@ -0,0 +1,4 @@
+struct request;
+struct seq_file;
+
+void scsi_show_rq(struct seq_file *m, struct request *rq);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f2cafae150bc..2db412dd4b44 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1988,7 +1988,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 
 	req->rq_flags |= RQF_QUIET;
 	req->timeout = 10 * HZ;
-	req->retries = 5;
+	rq->retries = 5;
 
 	blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done);
 }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e5a2d590a104..1c3e87d6c48f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -34,6 +34,7 @@
 
 #include <trace/events/scsi.h>
 
+#include "scsi_debugfs.h"
 #include "scsi_priv.h"
 #include "scsi_logging.h"
 
@@ -229,8 +230,8 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
  * @rq_flags:	flags for ->rq_flags
  * @resid:	optional residual length
  *
- * returns the req->errors value which is the scsi_cmnd result
- * field.
+ * Returns the scsi_cmnd result field if a command was executed, or a negative
+ * Linux error code if we didn't get that far.
  */
 int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
@@ -256,7 +257,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 
 	rq->cmd_len = COMMAND_SIZE(cmd[0]);
 	memcpy(rq->cmd, cmd, rq->cmd_len);
-	req->retries = retries;
+	rq->retries = retries;
 	req->timeout = timeout;
 	req->cmd_flags |= flags;
 	req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT;
@@ -281,7 +282,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
 	if (sshdr)
 		scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
-	ret = req->errors;
+	ret = rq->result;
  out:
 	blk_put_request(req);
 
@@ -797,8 +798,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		/*
 		 * __scsi_error_from_host_byte may have reset the host_byte
 		 */
-		req->errors = cmd->result;
-
+		scsi_req(req)->result = cmd->result;
 		scsi_req(req)->resid_len = scsi_get_resid(cmd);
 
 		if (scsi_bidi_cmnd(cmd)) {
@@ -835,7 +835,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	/*
 	 * Recovered errors need reporting, but they're always treated as
 	 * success, so fiddle the result code here.  For passthrough requests
-	 * we already took a copy of the original into rq->errors which
+	 * we already took a copy of the original into sreq->result which
 	 * is what gets returned to the user
 	 */
 	if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
@@ -1061,10 +1061,10 @@ int scsi_init_io(struct scsi_cmnd *cmd)
 	struct scsi_device *sdev = cmd->device;
 	struct request *rq = cmd->request;
 	bool is_mq = (rq->mq_ctx != NULL);
-	int error;
+	int error = BLKPREP_KILL;
 
 	if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
-		return -EINVAL;
+		goto err_exit;
 
 	error = scsi_init_sgtable(rq, &cmd->sdb);
 	if (error)
@@ -1177,7 +1177,7 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req)
 	cmd->cmd_len = scsi_req(req)->cmd_len;
 	cmd->cmnd = scsi_req(req)->cmd;
 	cmd->transfersize = blk_rq_bytes(req);
-	cmd->allowed = req->retries;
+	cmd->allowed = scsi_req(req)->retries;
 	return BLKPREP_OK;
 }
 
@@ -1281,7 +1281,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 	switch (ret) {
 	case BLKPREP_KILL:
 	case BLKPREP_INVALID:
-		req->errors = DID_NO_CONNECT << 16;
+		scsi_req(req)->result = DID_NO_CONNECT << 16;
 		/* release the command and kill it */
 		if (req->special) {
 			struct scsi_cmnd *cmd = req->special;
@@ -1905,7 +1905,7 @@ static int scsi_mq_prep_fn(struct request *req)
 static void scsi_mq_done(struct scsi_cmnd *cmd)
 {
 	trace_scsi_dispatch_cmd_done(cmd);
-	blk_mq_complete_request(cmd->request, cmd->request->errors);
+	blk_mq_complete_request(cmd->request);
 }
 
 static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2154,10 +2154,13 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 	return q;
 }
 
-static struct blk_mq_ops scsi_mq_ops = {
+static const struct blk_mq_ops scsi_mq_ops = {
 	.queue_rq	= scsi_queue_rq,
 	.complete	= scsi_softirq_done,
 	.timeout	= scsi_timeout,
+#ifdef CONFIG_BLK_DEBUG_FS
+	.show_rq	= scsi_show_rq,
+#endif
 	.init_request	= scsi_init_request,
 	.exit_request	= scsi_exit_request,
 	.map_queues	= scsi_map_queues,
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index cdbb293aca08..9fdbd50c31b4 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -184,9 +184,9 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 				blk_rq_bytes(req->next_rq);
 		handler = to_sas_internal(shost->transportt)->f->smp_handler;
 		ret = handler(shost, rphy, req);
-		req->errors = ret;
+		scsi_req(req)->result = ret;
 
-		blk_end_request_all(req, ret);
+		blk_end_request_all(req, 0);
 
 		spin_lock_irq(q->queue_lock);
 	}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 35ad5e8a31ab..0dc95e102e69 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -418,6 +418,46 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(provisioning_mode);
 
+static const char *zeroing_mode[] = {
+	[SD_ZERO_WRITE]		= "write",
+	[SD_ZERO_WS]		= "writesame",
+	[SD_ZERO_WS16_UNMAP]	= "writesame_16_unmap",
+	[SD_ZERO_WS10_UNMAP]	= "writesame_10_unmap",
+};
+
+static ssize_t
+zeroing_mode_show(struct device *dev, struct device_attribute *attr,
+		  char *buf)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+	return snprintf(buf, 20, "%s\n", zeroing_mode[sdkp->zeroing_mode]);
+}
+
+static ssize_t
+zeroing_mode_store(struct device *dev, struct device_attribute *attr,
+		   const char *buf, size_t count)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (!strncmp(buf, zeroing_mode[SD_ZERO_WRITE], 20))
+		sdkp->zeroing_mode = SD_ZERO_WRITE;
+	else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS], 20))
+		sdkp->zeroing_mode = SD_ZERO_WS;
+	else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS16_UNMAP], 20))
+		sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
+	else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS10_UNMAP], 20))
+		sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
+	else
+		return -EINVAL;
+
+	return count;
+}
+static DEVICE_ATTR_RW(zeroing_mode);
+
 static ssize_t
 max_medium_access_timeouts_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -496,6 +536,7 @@ static struct attribute *sd_disk_attrs[] = {
 	&dev_attr_app_tag_own.attr,
 	&dev_attr_thin_provisioning.attr,
 	&dev_attr_provisioning_mode.attr,
+	&dev_attr_zeroing_mode.attr,
 	&dev_attr_max_write_same_blocks.attr,
 	&dev_attr_max_medium_access_timeouts.attr,
 	NULL,
@@ -644,26 +685,11 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	unsigned int logical_block_size = sdkp->device->sector_size;
 	unsigned int max_blocks = 0;
 
-	q->limits.discard_zeroes_data = 0;
-
-	/*
-	 * When LBPRZ is reported, discard alignment and granularity
-	 * must be fixed to the logical block size. Otherwise the block
-	 * layer will drop misaligned portions of the request which can
-	 * lead to data corruption. If LBPRZ is not set, we honor the
-	 * device preference.
-	 */
-	if (sdkp->lbprz) {
-		q->limits.discard_alignment = 0;
-		q->limits.discard_granularity = logical_block_size;
-	} else {
-		q->limits.discard_alignment = sdkp->unmap_alignment *
-			logical_block_size;
-		q->limits.discard_granularity =
-			max(sdkp->physical_block_size,
-			    sdkp->unmap_granularity * logical_block_size);
-	}
-
+	q->limits.discard_alignment =
+		sdkp->unmap_alignment * logical_block_size;
+	q->limits.discard_granularity =
+		max(sdkp->physical_block_size,
+		    sdkp->unmap_granularity * logical_block_size);
 	sdkp->provisioning_mode = mode;
 
 	switch (mode) {
@@ -681,19 +707,16 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	case SD_LBP_WS16:
 		max_blocks = min_not_zero(sdkp->max_ws_blocks,
 					  (u32)SD_MAX_WS16_BLOCKS);
-		q->limits.discard_zeroes_data = sdkp->lbprz;
 		break;
 
 	case SD_LBP_WS10:
 		max_blocks = min_not_zero(sdkp->max_ws_blocks,
 					  (u32)SD_MAX_WS10_BLOCKS);
-		q->limits.discard_zeroes_data = sdkp->lbprz;
 		break;
 
 	case SD_LBP_ZERO:
 		max_blocks = min_not_zero(sdkp->max_ws_blocks,
 					  (u32)SD_MAX_WS10_BLOCKS);
-		q->limits.discard_zeroes_data = 1;
 		break;
 	}
 
@@ -701,93 +724,122 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
-/**
- * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device
- * @sdp: scsi device to operate on
- * @rq: Request to prepare
- *
- * Will issue either UNMAP or WRITE SAME(16) depending on preference
- * indicated by target device.
- **/
-static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
+static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
 {
-	struct request *rq = cmd->request;
 	struct scsi_device *sdp = cmd->device;
-	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
-	sector_t sector = blk_rq_pos(rq);
-	unsigned int nr_sectors = blk_rq_sectors(rq);
-	unsigned int len;
-	int ret;
+	struct request *rq = cmd->request;
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	unsigned int data_len = 24;
 	char *buf;
-	struct page *page;
-
-	sector >>= ilog2(sdp->sector_size) - 9;
-	nr_sectors >>= ilog2(sdp->sector_size) - 9;
 
-	page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
-	if (!page)
+	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!rq->special_vec.bv_page)
 		return BLKPREP_DEFER;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = data_len;
+	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	switch (sdkp->provisioning_mode) {
-	case SD_LBP_UNMAP:
-		buf = page_address(page);
+	cmd->cmd_len = 10;
+	cmd->cmnd[0] = UNMAP;
+	cmd->cmnd[8] = 24;
 
-		cmd->cmd_len = 10;
-		cmd->cmnd[0] = UNMAP;
-		cmd->cmnd[8] = 24;
+	buf = page_address(rq->special_vec.bv_page);
+	put_unaligned_be16(6 + 16, &buf[0]);
+	put_unaligned_be16(16, &buf[2]);
+	put_unaligned_be64(sector, &buf[8]);
+	put_unaligned_be32(nr_sectors, &buf[16]);
 
-		put_unaligned_be16(6 + 16, &buf[0]);
-		put_unaligned_be16(16, &buf[2]);
-		put_unaligned_be64(sector, &buf[8]);
-		put_unaligned_be32(nr_sectors, &buf[16]);
+	cmd->allowed = SD_MAX_RETRIES;
+	cmd->transfersize = data_len;
+	rq->timeout = SD_TIMEOUT;
+	scsi_req(rq)->resid_len = data_len;
 
-		len = 24;
-		break;
+	return scsi_init_io(cmd);
+}
 
-	case SD_LBP_WS16:
-		cmd->cmd_len = 16;
-		cmd->cmnd[0] = WRITE_SAME_16;
+static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+	struct scsi_device *sdp = cmd->device;
+	struct request *rq = cmd->request;
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 data_len = sdp->sector_size;
+
+	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!rq->special_vec.bv_page)
+		return BLKPREP_DEFER;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = data_len;
+	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	cmd->cmd_len = 16;
+	cmd->cmnd[0] = WRITE_SAME_16;
+	if (unmap)
 		cmd->cmnd[1] = 0x8; /* UNMAP */
-		put_unaligned_be64(sector, &cmd->cmnd[2]);
-		put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
+	put_unaligned_be64(sector, &cmd->cmnd[2]);
+	put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
 
-		len = sdkp->device->sector_size;
-		break;
+	cmd->allowed = SD_MAX_RETRIES;
+	cmd->transfersize = data_len;
+	rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
+	scsi_req(rq)->resid_len = data_len;
 
-	case SD_LBP_WS10:
-	case SD_LBP_ZERO:
-		cmd->cmd_len = 10;
-		cmd->cmnd[0] = WRITE_SAME;
-		if (sdkp->provisioning_mode == SD_LBP_WS10)
-			cmd->cmnd[1] = 0x8; /* UNMAP */
-		put_unaligned_be32(sector, &cmd->cmnd[2]);
-		put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
+	return scsi_init_io(cmd);
+}
 
-		len = sdkp->device->sector_size;
-		break;
+static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+	struct scsi_device *sdp = cmd->device;
+	struct request *rq = cmd->request;
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 data_len = sdp->sector_size;
 
-	default:
-		ret = BLKPREP_INVALID;
-		goto out;
-	}
+	rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!rq->special_vec.bv_page)
+		return BLKPREP_DEFER;
+	rq->special_vec.bv_offset = 0;
+	rq->special_vec.bv_len = data_len;
+	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	rq->timeout = SD_TIMEOUT;
+	cmd->cmd_len = 10;
+	cmd->cmnd[0] = WRITE_SAME;
+	if (unmap)
+		cmd->cmnd[1] = 0x8; /* UNMAP */
+	put_unaligned_be32(sector, &cmd->cmnd[2]);
+	put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
 
-	cmd->transfersize = len;
 	cmd->allowed = SD_MAX_RETRIES;
+	cmd->transfersize = data_len;
+	rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
+	scsi_req(rq)->resid_len = data_len;
 
-	rq->special_vec.bv_page = page;
-	rq->special_vec.bv_offset = 0;
-	rq->special_vec.bv_len = len;
+	return scsi_init_io(cmd);
+}
 
-	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
-	scsi_req(rq)->resid_len = len;
+static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
+{
+	struct request *rq = cmd->request;
+	struct scsi_device *sdp = cmd->device;
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+
+	if (!(rq->cmd_flags & REQ_NOUNMAP)) {
+		switch (sdkp->zeroing_mode) {
+		case SD_ZERO_WS16_UNMAP:
+			return sd_setup_write_same16_cmnd(cmd, true);
+		case SD_ZERO_WS10_UNMAP:
+			return sd_setup_write_same10_cmnd(cmd, true);
+		}
+	}
 
-	ret = scsi_init_io(cmd);
-out:
-	if (ret != BLKPREP_OK)
-		__free_page(page);
-	return ret;
+	if (sdp->no_write_same)
+		return BLKPREP_INVALID;
+	if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
+		return sd_setup_write_same16_cmnd(cmd, false);
+	return sd_setup_write_same10_cmnd(cmd, false);
 }
 
 static void sd_config_write_same(struct scsi_disk *sdkp)
@@ -816,9 +868,20 @@ static void sd_config_write_same(struct scsi_disk *sdkp)
 		sdkp->max_ws_blocks = 0;
 	}
 
+	if (sdkp->lbprz && sdkp->lbpws)
+		sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
+	else if (sdkp->lbprz && sdkp->lbpws10)
+		sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
+	else if (sdkp->max_ws_blocks)
+		sdkp->zeroing_mode = SD_ZERO_WS;
+	else
+		sdkp->zeroing_mode = SD_ZERO_WRITE;
+
 out:
 	blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks *
 					 (logical_block_size >> 9));
+	blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks *
+					 (logical_block_size >> 9));
 }
 
 /**
@@ -1155,7 +1218,20 @@ static int sd_init_command(struct scsi_cmnd *cmd)
 
 	switch (req_op(rq)) {
 	case REQ_OP_DISCARD:
-		return sd_setup_discard_cmnd(cmd);
+		switch (scsi_disk(rq->rq_disk)->provisioning_mode) {
+		case SD_LBP_UNMAP:
+			return sd_setup_unmap_cmnd(cmd);
+		case SD_LBP_WS16:
+			return sd_setup_write_same16_cmnd(cmd, true);
+		case SD_LBP_WS10:
+			return sd_setup_write_same10_cmnd(cmd, true);
+		case SD_LBP_ZERO:
+			return sd_setup_write_same10_cmnd(cmd, false);
+		default:
+			return BLKPREP_INVALID;
+		}
+	case REQ_OP_WRITE_ZEROES:
+		return sd_setup_write_zeroes_cmnd(cmd);
 	case REQ_OP_WRITE_SAME:
 		return sd_setup_write_same_cmnd(cmd);
 	case REQ_OP_FLUSH:
@@ -1795,6 +1871,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 
 	switch (req_op(req)) {
 	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
 	case REQ_OP_WRITE_SAME:
 	case REQ_OP_ZONE_RESET:
 		if (!result) {
@@ -2768,7 +2845,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 				sd_config_discard(sdkp, SD_LBP_WS16);
 
 		} else {	/* LBP VPD page tells us what to use */
-			if (sdkp->lbpu && sdkp->max_unmap_blocks && !sdkp->lbprz)
+			if (sdkp->lbpu && sdkp->max_unmap_blocks)
 				sd_config_discard(sdkp, SD_LBP_UNMAP);
 			else if (sdkp->lbpws)
 				sd_config_discard(sdkp, SD_LBP_WS16);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4dac35e96a75..a2c4b5c35379 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -59,6 +59,13 @@ enum {
 	SD_LBP_DISABLE,		/* Discard disabled due to failed cmd */
 };
 
+enum {
+	SD_ZERO_WRITE = 0,	/* Use WRITE(10/16) command */
+	SD_ZERO_WS,		/* Use WRITE SAME(10/16) command */
+	SD_ZERO_WS16_UNMAP,	/* Use WRITE SAME(16) with UNMAP */
+	SD_ZERO_WS10_UNMAP,	/* Use WRITE SAME(10) with UNMAP */
+};
+
 struct scsi_disk {
 	struct scsi_driver *driver;	/* always &sd_template */
 	struct scsi_device *device;
@@ -89,6 +96,7 @@ struct scsi_disk {
 	u8		write_prot;
 	u8		protection_type;/* Data Integrity Field */
 	u8		provisioning_mode;
+	u8		zeroing_mode;
 	unsigned	ATO : 1;	/* state of disk ATO bit */
 	unsigned	cache_override : 1; /* temp override of WCE,RCD */
 	unsigned	WCE : 1;	/* state of disk WCE bit */
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 92620c8ea8ad..1994f7799fce 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -329,6 +329,7 @@ void sd_zbc_complete(struct scsi_cmnd *cmd,
 
 	switch (req_op(rq)) {
 	case REQ_OP_WRITE:
+	case REQ_OP_WRITE_ZEROES:
 	case REQ_OP_WRITE_SAME:
 	case REQ_OP_ZONE_RESET:
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 225abaad4d1c..504504beaa5e 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1300,7 +1300,7 @@ sg_rq_end_io(struct request *rq, int uptodate)
 		pr_info("%s: device detaching\n", __func__);
 
 	sense = req->sense;
-	result = rq->errors;
+	result = req->result;
 	resid = req->resid_len;
 
 	SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp,
@@ -1718,7 +1718,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 
 	srp->rq = rq;
 	rq->end_io_data = srp;
-	rq->retries = SG_DEFAULT_RETRIES;
+	req->retries = SG_DEFAULT_RETRIES;
 
 	if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE))
 		return 0;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index e5ef78a6848e..1ea34d6f5437 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -480,7 +480,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time);
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
 		atomic64_inc(&STp->stats->write_cnt);
-		if (req->errors) {
+		if (scsi_req(req)->result) {
 			atomic64_add(atomic_read(&STp->stats->last_write_size)
 				- STp->buffer->cmdstat.residual,
 				&STp->stats->write_byte_cnt);
@@ -494,7 +494,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time);
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
 		atomic64_inc(&STp->stats->read_cnt);
-		if (req->errors) {
+		if (scsi_req(req)->result) {
 			atomic64_add(atomic_read(&STp->stats->last_read_size)
 				- STp->buffer->cmdstat.residual,
 				&STp->stats->read_byte_cnt);
@@ -518,7 +518,7 @@ static void st_scsi_execute_end(struct request *req, int uptodate)
 	struct scsi_tape *STp = SRpnt->stp;
 	struct bio *tmp;
 
-	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
+	STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result;
 	STp->buffer->cmdstat.residual = rq->resid_len;
 
 	st_do_stats(STp, req);
@@ -579,7 +579,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 	memset(rq->cmd, 0, BLK_MAX_CDB);
 	memcpy(rq->cmd, cmd, rq->cmd_len);
 	req->timeout = timeout;
-	req->retries = retries;
+	rq->retries = retries;
 	req->end_io_data = SRpnt;
 
 	blk_execute_rq_nowait(req->q, NULL, req, 1, st_scsi_execute_end);
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
index 8886458748c1..a676bccabd43 100644
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -133,13 +133,9 @@ struct lustre_sb_info {
 	struct obd_export	 *lsi_osd_exp;
 	char			  lsi_osd_type[16];
 	char			  lsi_fstype[16];
-	struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
-						* own backing_dev_info
-						*/
 };
 
 #define LSI_UMOUNT_FAILOVER	      0x00200000
-#define LSI_BDI_INITIALIZED	      0x00400000
 
 #define     s2lsi(sb)	((struct lustre_sb_info *)((sb)->s_fs_info))
 #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index b229cbc7bb33..d483c44aafe5 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -863,15 +863,6 @@ void ll_lli_init(struct ll_inode_info *lli)
 	mutex_init(&lli->lli_layout_mutex);
 }
 
-static inline int ll_bdi_register(struct backing_dev_info *bdi)
-{
-	static atomic_t ll_bdi_num = ATOMIC_INIT(0);
-
-	bdi->name = "lustre";
-	return bdi_register(bdi, NULL, "lustre-%d",
-			    atomic_inc_return(&ll_bdi_num));
-}
-
 int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 {
 	struct lustre_profile *lprof = NULL;
@@ -881,6 +872,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 	char  *profilenm = get_profile_name(sb);
 	struct config_llog_instance *cfg;
 	int    err;
+	static atomic_t ll_bdi_num = ATOMIC_INIT(0);
 
 	CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
 
@@ -903,16 +895,11 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 	if (err)
 		goto out_free;
 
-	err = bdi_init(&lsi->lsi_bdi);
-	if (err)
-		goto out_free;
-	lsi->lsi_flags |= LSI_BDI_INITIALIZED;
-	lsi->lsi_bdi.capabilities = 0;
-	err = ll_bdi_register(&lsi->lsi_bdi);
+	err = super_setup_bdi_name(sb, "lustre-%d",
+				   atomic_inc_return(&ll_bdi_num));
 	if (err)
 		goto out_free;
 
-	sb->s_bdi = &lsi->lsi_bdi;
 	/* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
 	sb->s_d_op = &ll_d_ops;
 
@@ -1033,11 +1020,6 @@ void ll_put_super(struct super_block *sb)
 	if (profilenm)
 		class_del_profile(profilenm);
 
-	if (lsi->lsi_flags & LSI_BDI_INITIALIZED) {
-		bdi_destroy(&lsi->lsi_bdi);
-		lsi->lsi_flags &= ~LSI_BDI_INITIALIZED;
-	}
-
 	ll_free_sbi(sb);
 	lsi->lsi_llsbi = NULL;
 
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 344e8448869c..5798810197ec 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -167,10 +167,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
 	struct iscsi_portal_group *tpg;
 	struct iscsi_tpg_np *tpg_np;
 	char *str, *str2, *ip_str, *port_str;
-	struct sockaddr_storage sockaddr;
-	struct sockaddr_in *sock_in;
-	struct sockaddr_in6 *sock_in6;
-	unsigned long port;
+	struct sockaddr_storage sockaddr = { };
 	int ret;
 	char buf[MAX_PORTAL_LEN + 1];
 
@@ -182,21 +179,19 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
 	memset(buf, 0, MAX_PORTAL_LEN + 1);
 	snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
 
-	memset(&sockaddr, 0, sizeof(struct sockaddr_storage));
-
 	str = strstr(buf, "[");
 	if (str) {
-		const char *end;
-
 		str2 = strstr(str, "]");
 		if (!str2) {
 			pr_err("Unable to locate trailing \"]\""
 				" in IPv6 iSCSI network portal address\n");
 			return ERR_PTR(-EINVAL);
 		}
-		str++; /* Skip over leading "[" */
+
+		ip_str = str + 1; /* Skip over leading "[" */
 		*str2 = '\0'; /* Terminate the unbracketed IPv6 address */
 		str2++; /* Skip over the \0 */
+
 		port_str = strstr(str2, ":");
 		if (!port_str) {
 			pr_err("Unable to locate \":port\""
@@ -205,23 +200,8 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
 		}
 		*port_str = '\0'; /* Terminate string for IP */
 		port_str++; /* Skip over ":" */
-
-		ret = kstrtoul(port_str, 0, &port);
-		if (ret < 0) {
-			pr_err("kstrtoul() failed for port_str: %d\n", ret);
-			return ERR_PTR(ret);
-		}
-		sock_in6 = (struct sockaddr_in6 *)&sockaddr;
-		sock_in6->sin6_family = AF_INET6;
-		sock_in6->sin6_port = htons((unsigned short)port);
-		ret = in6_pton(str, -1,
-				(void *)&sock_in6->sin6_addr.in6_u, -1, &end);
-		if (ret <= 0) {
-			pr_err("in6_pton returned: %d\n", ret);
-			return ERR_PTR(-EINVAL);
-		}
 	} else {
-		str = ip_str = &buf[0];
+		ip_str = &buf[0];
 		port_str = strstr(ip_str, ":");
 		if (!port_str) {
 			pr_err("Unable to locate \":port\""
@@ -230,17 +210,15 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
 		}
 		*port_str = '\0'; /* Terminate string for IP */
 		port_str++; /* Skip over ":" */
+	}
 
-		ret = kstrtoul(port_str, 0, &port);
-		if (ret < 0) {
-			pr_err("kstrtoul() failed for port_str: %d\n", ret);
-			return ERR_PTR(ret);
-		}
-		sock_in = (struct sockaddr_in *)&sockaddr;
-		sock_in->sin_family = AF_INET;
-		sock_in->sin_port = htons((unsigned short)port);
-		sock_in->sin_addr.s_addr = in_aton(ip_str);
+	ret = inet_pton_with_scope(&init_net, AF_UNSPEC, ip_str,
+			port_str, &sockaddr);
+	if (ret) {
+		pr_err("malformed ip/port passed: %s\n", name);
+		return ERR_PTR(ret);
 	}
+
 	tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
 	ret = iscsit_get_tpg(tpg);
 	if (ret < 0)
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index c754ae33bf7b..d2f089cfa9ae 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -851,7 +851,7 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
 	attrib->unmap_granularity = q->limits.discard_granularity / block_size;
 	attrib->unmap_granularity_alignment = q->limits.discard_alignment /
 								block_size;
-	attrib->unmap_zeroes_data = q->limits.discard_zeroes_data;
+	attrib->unmap_zeroes_data = 0;
 	return true;
 }
 EXPORT_SYMBOL(target_configure_unmap_from_queue);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 94cda7991e80..a93d94e68ab5 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1008,7 +1008,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 		req->timeout = PS_TIMEOUT_DISK;
 	else
 		req->timeout = PS_TIMEOUT_OTHER;
-	req->retries = PS_RETRY;
+	scsi_req(req)->retries = PS_RETRY;
 
 	blk_execute_rq_nowait(pdv->pdv_sd->request_queue, NULL, req,
 			(cmd->sam_task_attr == TCM_HEAD_TAG),
@@ -1050,7 +1050,7 @@ static void pscsi_req_done(struct request *req, int uptodate)
 	struct se_cmd *cmd = req->end_io_data;
 	struct pscsi_plugin_task *pt = cmd->priv;
 
-	pt->pscsi_result = req->errors;
+	pt->pscsi_result = scsi_req(req)->result;
 	pt->pscsi_resid = scsi_req(req)->resid_len;
 
 	cmd->scsi_status = status_byte(pt->pscsi_result) << 1;
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 776b34396144..0a16cf4bed39 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -291,18 +291,6 @@ config ARMADA_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Armada 370 and Armada XP SoC.
 
-config DB8500_CPUFREQ_COOLING
-	tristate "DB8500 cpufreq cooling"
-	depends on ARCH_U8500 || COMPILE_TEST
-	depends on HAS_IOMEM
-	depends on CPU_THERMAL
-	default y
-	help
-	  Adds DB8500 cpufreq cooling devices, and these cooling devices can be
-	  bound to thermal zone trip points. When a trip point reached, the
-	  bound cpufreq cooling device turns active to set CPU frequency low to
-	  cool down the CPU.
-
 config INTEL_POWERCLAMP
 	tristate "Intel PowerClamp idle injection driver"
 	depends on THERMAL
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 7adae2029355..c2372f10dae5 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_TANGO_THERMAL)	+= tango_thermal.o
 obj-$(CONFIG_IMX_THERMAL)	+= imx_thermal.o
 obj-$(CONFIG_MAX77620_THERMAL)	+= max77620_thermal.o
 obj-$(CONFIG_QORIQ_THERMAL)	+= qoriq_thermal.o
-obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
 obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
 obj-$(CONFIG_X86_PKG_TEMP_THERMAL)	+= x86_pkg_temp_thermal.o
 obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE)	+= intel_soc_dts_iosf.o
diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
deleted file mode 100644
index e58bd0b658b5..000000000000
--- a/drivers/thermal/db8500_cpufreq_cooling.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * db8500_cpufreq_cooling.c - DB8500 cpufreq works as cooling device.
- *
- * Copyright (C) 2012 ST-Ericsson
- * Copyright (C) 2012 Linaro Ltd.
- *
- * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/cpu_cooling.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-static int db8500_cpufreq_cooling_probe(struct platform_device *pdev)
-{
-	struct thermal_cooling_device *cdev;
-
-	cdev = cpufreq_cooling_register(cpu_present_mask);
-	if (IS_ERR(cdev)) {
-		int ret = PTR_ERR(cdev);
-
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev,
-				"Failed to register cooling device %d\n",
-				ret);
-				
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, cdev);
-
-	dev_info(&pdev->dev, "Cooling device registered: %s\n",	cdev->type);
-
-	return 0;
-}
-
-static int db8500_cpufreq_cooling_remove(struct platform_device *pdev)
-{
-	struct thermal_cooling_device *cdev = platform_get_drvdata(pdev);
-
-	cpufreq_cooling_unregister(cdev);
-
-	return 0;
-}
-
-static int db8500_cpufreq_cooling_suspend(struct platform_device *pdev,
-		pm_message_t state)
-{
-	return -ENOSYS;
-}
-
-static int db8500_cpufreq_cooling_resume(struct platform_device *pdev)
-{
-	return -ENOSYS;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id db8500_cpufreq_cooling_match[] = {
-	{ .compatible = "stericsson,db8500-cpufreq-cooling" },
-	{},
-};
-MODULE_DEVICE_TABLE(of, db8500_cpufreq_cooling_match);
-#endif
-
-static struct platform_driver db8500_cpufreq_cooling_driver = {
-	.driver = {
-		.name = "db8500-cpufreq-cooling",
-		.of_match_table = of_match_ptr(db8500_cpufreq_cooling_match),
-	},
-	.probe = db8500_cpufreq_cooling_probe,
-	.suspend = db8500_cpufreq_cooling_suspend,
-	.resume = db8500_cpufreq_cooling_resume,
-	.remove = db8500_cpufreq_cooling_remove,
-};
-
-static int __init db8500_cpufreq_cooling_init(void)
-{
-	return platform_driver_register(&db8500_cpufreq_cooling_driver);
-}
-
-static void __exit db8500_cpufreq_cooling_exit(void)
-{
-	platform_driver_unregister(&db8500_cpufreq_cooling_driver);
-}
-
-/* Should be later than db8500_cpufreq_register */
-late_initcall(db8500_cpufreq_cooling_init);
-module_exit(db8500_cpufreq_cooling_exit);
-
-MODULE_AUTHOR("Hongbo Zhang <hongbo.zhang@stericsson.com>");
-MODULE_DESCRIPTION("DB8500 cpufreq cooling driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index d7efcb632f7d..002f1ce22bd0 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -297,14 +297,15 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	}
 
 	/*
-	 * If the GPIO is configured as input, change the direction to output
-	 * and set the GPIO as active.
+	 * If the GPIO is not known to be already configured as output, that
+	 * is, if gpiod_get_direction returns either GPIOF_DIR_IN or -EINVAL,
+	 * change the direction to output and set the GPIO as active.
 	 * Do not force the GPIO to active when it was already output as it
 	 * could cause backlight flickering or we would enable the backlight too
 	 * early. Leave the decision of the initial backlight state for later.
 	 */
 	if (pb->enable_gpio &&
-	    gpiod_get_direction(pb->enable_gpio) == GPIOF_DIR_IN)
+	    gpiod_get_direction(pb->enable_gpio) != GPIOF_DIR_OUT)
 		gpiod_direction_output(pb->enable_gpio, 1);
 
 	pb->power_supply = devm_regulator_get(&pdev->dev, "power");