110 files changed, 3268 insertions, 2119 deletions
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 21932d640a41..a1d177d58254 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -108,9 +108,7 @@ acpi_ex_add_table(u32 table_index,
 
 	/* Add the table to the namespace */
 
-	acpi_ex_exit_interpreter();
 	status = acpi_ns_load_table(table_index, parent_node);
-	acpi_ex_enter_interpreter();
 	if (ACPI_FAILURE(status)) {
 		acpi_ut_remove_reference(obj_desc);
 		*ddb_handle = NULL;
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index 297f6aacd7d4..b5e2b0ada0ab 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -46,7 +46,6 @@
 #include "acnamesp.h"
 #include "acdispat.h"
 #include "actables.h"
-#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsload")
@@ -79,8 +78,6 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
 
 	ACPI_FUNCTION_TRACE(ns_load_table);
 
-	acpi_ex_enter_interpreter();
-
 	/*
 	 * Parse the table and load the namespace with all named
 	 * objects found within. Control methods are NOT parsed
@@ -92,7 +89,7 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
 	 */
 	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
-		goto unlock_interp;
+		return_ACPI_STATUS(status);
 	}
 
 	/* If table already loaded into namespace, just return */
@@ -133,8 +130,6 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
 
 unlock:
 	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-unlock_interp:
-	(void)acpi_ex_exit_interpreter();
 
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 73c76d646064..290d6f5be44b 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1331,8 +1331,6 @@ static int ec_install_handlers(struct acpi_ec *ec)
 
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
-	acpi_ec_stop(ec, false);
-
 	if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
 		if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
 					ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
@@ -1340,6 +1338,19 @@ static void ec_remove_handlers(struct acpi_ec *ec)
 		clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
 	}
 
+	/*
+	 * Stops handling the EC transactions after removing the operation
+	 * region handler. This is required because _REG(DISCONNECT)
+	 * invoked during the removal can result in new EC transactions.
+	 *
+	 * Flushes the EC requests and thus disables the GPE before
+	 * removing the GPE handler. This is required by the current ACPICA
+	 * GPE core. ACPICA GPE core will automatically disable a GPE when
+	 * it is indicated but there is no way to handle it. So the drivers
+	 * must disable the GPEs prior to removing the GPE handlers.
+	 */
+	acpi_ec_stop(ec, false);
+
 	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
 		if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
 					&acpi_ec_gpe_handler)))
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index ac6ddcc080d4..1f0e06065ae6 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -1131,11 +1131,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 
 	/*
 	 * Until standardization materializes we need to consider up to 3
-	 * different command sets.  Note, that checking for zero functions
-	 * tells us if any commands might be reachable through this uuid.
+	 * different command sets.  Note, that checking for function0 (bit0)
+	 * tells us if any commands are reachable through this uuid.
 	 */
 	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
-		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 0))
+		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
 			break;
 
 	/* limit the supported commands to those that are publicly documented */
@@ -1151,9 +1151,10 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		if (disable_vendor_specific)
 			dsm_mask &= ~(1 << 8);
 	} else {
-		dev_err(dev, "unknown dimm command family\n");
+		dev_dbg(dev, "unknown dimm command family\n");
 		nfit_mem->family = -1;
-		return force_enable_dimms ? 0 : -ENODEV;
+		/* DSMs are optional, continue loading the driver... */
+		return 0;
 	}
 
 	uuid = to_nfit_uuid(nfit_mem->family);
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index b4de130f2d57..22c09952e177 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -680,6 +680,9 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 	u64 mask = 0;
 	union acpi_object *obj;
 
+	if (funcs == 0)
+		return false;
+
 	obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
 	if (!obj)
 		return false;
@@ -692,9 +695,6 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 			mask |= (((u64)obj->buffer.pointer[i]) << (i * 8));
 	ACPI_FREE(obj);
 
-	if (funcs == 0)
-		return true;
-
 	/*
 	 * Bit 0 indicates whether there's support for any functions other than
 	 * function 0 for the specified UUID and revision.
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 6be7770f68e9..31c183aed368 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4314,6 +4314,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	 */
 	{ "ST380013AS",		"3.20",		ATA_HORKAGE_MAX_SEC_1024 },
 
+	/*
+	 * Device times out with higher max sects.
+	 * https://bugzilla.kernel.org/show_bug.cgi?id=121671
+	 */
+	{ "LITEON CX1-JB256-HP", NULL,		ATA_HORKAGE_MAX_SEC_1024 },
+
 	/* Devices we expect to fail diagnostics */
 
 	/* Devices where NCQ should be avoided */
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index eda09090cb52..f642c4264c27 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -8,8 +8,6 @@
 #include <linux/bcma/bcma.h>
 #include <linux/delay.h>
 
-#define BCMA_CORE_SIZE		0x1000
-
 #define bcma_err(bus, fmt, ...) \
 	pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__)
 #define bcma_warn(bus, fmt, ...) \
diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 10f846cc8db1..25d5906640c3 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -99,7 +99,7 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index)
 	struct clk_programmable *prog = to_clk_programmable(hw);
 	const struct clk_programmable_layout *layout = prog->layout;
 	unsigned int mask = layout->css_mask;
-	unsigned int pckr = 0;
+	unsigned int pckr = index;
 
 	if (layout->have_slck_mck)
 		mask |= AT91_PMC_CSSMCK_MCK;
diff --git a/drivers/clk/sunxi/clk-sun4i-display.c b/drivers/clk/sunxi/clk-sun4i-display.c
index 445a7498d6df..9780fac6d029 100644
--- a/drivers/clk/sunxi/clk-sun4i-display.c
+++ b/drivers/clk/sunxi/clk-sun4i-display.c
@@ -33,6 +33,8 @@ struct sun4i_a10_display_clk_data {
 
 	u8	width_div;
 	u8	width_mux;
+
+	u32	flags;
 };
 
 struct reset_data {
@@ -166,7 +168,7 @@ static void __init sun4i_a10_display_init(struct device_node *node,
 				     data->has_div ? &div->hw : NULL,
 				     data->has_div ? &clk_divider_ops : NULL,
 				     &gate->hw, &clk_gate_ops,
-				     0);
+				     data->flags);
 	if (IS_ERR(clk)) {
 		pr_err("%s: Couldn't register the clock\n", clk_name);
 		goto free_div;
@@ -232,6 +234,7 @@ static const struct sun4i_a10_display_clk_data sun4i_a10_tcon_ch0_data __initcon
 	.offset_rst	= 29,
 	.offset_mux	= 24,
 	.width_mux	= 2,
+	.flags		= CLK_SET_RATE_PARENT,
 };
 
 static void __init sun4i_a10_tcon_ch0_setup(struct device_node *node)
diff --git a/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c b/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
index 98a4582de56a..b6d29d1bedca 100644
--- a/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
+++ b/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
@@ -79,15 +79,11 @@ static int tcon_ch1_is_enabled(struct clk_hw *hw)
 static u8 tcon_ch1_get_parent(struct clk_hw *hw)
 {
 	struct tcon_ch1_clk *tclk = hw_to_tclk(hw);
-	int num_parents = clk_hw_get_num_parents(hw);
 	u32 reg;
 
 	reg = readl(tclk->reg) >> TCON_CH1_SCLK2_MUX_SHIFT;
 	reg &= reg >> TCON_CH1_SCLK2_MUX_MASK;
 
-	if (reg >= num_parents)
-		return -EINVAL;
-
 	return reg;
 }
 
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index 6d74b91f2152..5fc3dbb9ada0 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -2,6 +2,7 @@ $(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \
 			     $(obj)/qat_rsapubkey-asn1.h
 $(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \
 			      $(obj)/qat_rsaprivkey-asn1.h
+$(obj)/qat_asym_algs.o: $(obj)/qat_rsapubkey-asn1.h $(obj)/qat_rsaprivkey-asn1.h
 
 clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h
 clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 6744d88bdea8..4fb2eb7c800d 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -2378,22 +2378,19 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
  * @num_mc: pointer to the memory controllers count, to be incremented in case
  *	    of success.
  * @table: model specific table
- * @allow_dups: allow for multiple devices to exist with the same device id
- *              (as implemented, this isn't expected to work correctly in the
- *              multi-socket case).
- * @multi_bus: don't assume devices on different buses belong to different
- *             memory controllers.
  *
  * returns 0 in case of success or error code
  */
-static int sbridge_get_all_devices_full(u8 *num_mc,
-					const struct pci_id_table *table,
-					int allow_dups,
-					int multi_bus)
+static int sbridge_get_all_devices(u8 *num_mc,
+					const struct pci_id_table *table)
 {
 	int i, rc;
 	struct pci_dev *pdev = NULL;
+	int allow_dups = 0;
+	int multi_bus = 0;
 
+	if (table->type == KNIGHTS_LANDING)
+		allow_dups = multi_bus = 1;
 	while (table && table->descr) {
 		for (i = 0; i < table->n_devs; i++) {
 			if (!allow_dups || i == 0 ||
@@ -2420,11 +2417,6 @@ static int sbridge_get_all_devices_full(u8 *num_mc,
 	return 0;
 }
 
-#define sbridge_get_all_devices(num_mc, table) \
-		sbridge_get_all_devices_full(num_mc, table, 0, 0)
-#define sbridge_get_all_devices_knl(num_mc, table) \
-		sbridge_get_all_devices_full(num_mc, table, 1, 1)
-
 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 				 struct sbridge_dev *sbridge_dev)
 {
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 536112fd2466..d7860614f87f 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -402,9 +402,12 @@ config GPIO_TB10X
 	select OF_GPIO
 
 config GPIO_TEGRA
-	bool
-	default y
+	bool "NVIDIA Tegra GPIO support"
+	default ARCH_TEGRA
 	depends on ARCH_TEGRA || COMPILE_TEST
+	depends on OF
+	help
+	  Say yes here to support GPIO pins on NVIDIA Tegra SoCs.
 
 config GPIO_TS4800
 	tristate "TS-4800 DIO blocks and compatibles"
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index 13cdb01e9b45..bc56c8a181e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -156,3 +156,18 @@ u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap)
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, u8 slave_addr, u8 line_number, u8 offset, u8 data)
+{
+	PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+
+	args.ucRegIndex = offset;
+	args.lpI2CDataOut = data;
+	args.ucFlag = 1;
+	args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+	args.ucTransBytes = 1;
+	args.ucSlaveAddr = slave_addr;
+	args.ucLineNumber = line_number;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
index d6128d9de56e..251aaf41f65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
@@ -27,5 +27,7 @@
 int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
 		      struct i2c_msg *msgs, int num);
 u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap);
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev,
+		u8 slave_addr, u8 line_number, u8 offset, u8 data);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index b2ebd4fef6cf..c2ef94511f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -28,6 +28,7 @@
 #include "vid.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_atombios.h"
+#include "atombios_i2c.h"
 #include "clearstate_vi.h"
 
 #include "gmc/gmc_8_2_d.h"
@@ -284,6 +285,7 @@ static const u32 golden_settings_polaris11_a11[] =
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris11_golden_common_all[] =
@@ -314,6 +316,7 @@ static const u32 golden_settings_polaris10_a11[] =
 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris10_golden_common_all[] =
@@ -696,6 +699,10 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 						 polaris10_golden_common_all,
 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
+		if (adev->pdev->revision == 0xc7) {
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
+		}
 		break;
 	case CHIP_CARRIZO:
 		amdgpu_program_register_sequence(adev,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f313b4d8344f..85c4debf47e0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -512,6 +512,10 @@ void intel_detect_pch(struct drm_device *dev)
 				DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
 				WARN_ON(!IS_SKYLAKE(dev) &&
 					!IS_KABYLAKE(dev));
+			} else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) {
+				dev_priv->pch_type = PCH_KBP;
+				DRM_DEBUG_KMS("Found KabyPoint PCH\n");
+				WARN_ON(!IS_KABYLAKE(dev));
 			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
 				   (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) ||
 				   ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c334e902266..bc3f2e6842e7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -990,6 +990,7 @@ enum intel_pch {
 	PCH_CPT,	/* Cougarpoint PCH */
 	PCH_LPT,	/* Lynxpoint PCH */
 	PCH_SPT,        /* Sunrisepoint PCH */
+	PCH_KBP,        /* Kabypoint PCH */
 	PCH_NOP,
 };
 
@@ -2600,6 +2601,15 @@ struct drm_i915_cmd_table {
 
 #define IS_BXT_REVID(p, since, until) (IS_BROXTON(p) && IS_REVID(p, since, until))
 
+#define KBL_REVID_A0		0x0
+#define KBL_REVID_B0		0x1
+#define KBL_REVID_C0		0x2
+#define KBL_REVID_D0		0x3
+#define KBL_REVID_E0		0x4
+
+#define IS_KBL_REVID(p, since, until) \
+	(IS_KABYLAKE(p) && IS_REVID(p, since, until))
+
 /*
  * The genX designation typically refers to the render engine, so render
  * capability related checks should use IS_GEN, while display and other checks
@@ -2708,11 +2718,13 @@ struct drm_i915_cmd_table {
 #define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE		0x9c00
 #define INTEL_PCH_SPT_DEVICE_ID_TYPE		0xA100
 #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE		0x9D00
+#define INTEL_PCH_KBP_DEVICE_ID_TYPE		0xA200
 #define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
 #define INTEL_PCH_P3X_DEVICE_ID_TYPE		0x7000
 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
 
 #define INTEL_PCH_TYPE(dev) (__I915__(dev)->pch_type)
+#define HAS_PCH_KBP(dev) (INTEL_PCH_TYPE(dev) == PCH_KBP)
 #define HAS_PCH_SPT(dev) (INTEL_PCH_TYPE(dev) == PCH_SPT)
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_LPT_LP(dev) (__I915__(dev)->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 425e721aac58..66571466e9a8 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 	if (!mutex_is_locked(mutex))
 		return false;
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
 	return mutex->owner == task;
 #else
 	/* Since UP may be pre-empted, we cannot assume that we own the lock */
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index b7ce963fb8f8..44004e3f09e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -55,8 +55,10 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
 		return -ENODEV;
 
 	/* See the comment at the drm_mm_init() call for more about this check.
-	 * WaSkipStolenMemoryFirstPage:bdw,chv (incomplete) */
-	if (INTEL_INFO(dev_priv)->gen == 8 && start < 4096)
+	 * WaSkipStolenMemoryFirstPage:bdw,chv,kbl (incomplete)
+	 */
+	if (start < 4096 && (IS_GEN8(dev_priv) ||
+			     IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)))
 		start = 4096;
 
 	mutex_lock(&dev_priv->mm.stolen_lock);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2f6fd33c07ba..aab47f7bb61b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2471,7 +2471,7 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
 			I915_WRITE(SDEIIR, iir);
 			ret = IRQ_HANDLED;
 
-			if (HAS_PCH_SPT(dev_priv))
+			if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv))
 				spt_irq_handler(dev, iir);
 			else
 				cpt_irq_handler(dev, iir);
@@ -4661,7 +4661,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 		dev->driver->disable_vblank = gen8_disable_vblank;
 		if (IS_BROXTON(dev))
 			dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup;
-		else if (HAS_PCH_SPT(dev))
+		else if (HAS_PCH_SPT(dev) || HAS_PCH_KBP(dev))
 			dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup;
 		else
 			dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b407411e31ba..3fcf7dd5b6ca 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -220,6 +220,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   ECOCHK_PPGTT_WT_HSW		(0x2<<3)
 #define   ECOCHK_PPGTT_WB_HSW		(0x3<<3)
 
+#define GEN8_CONFIG0			_MMIO(0xD00)
+#define  GEN9_DEFAULT_FIXES		(1 << 3 | 1 << 2 | 1 << 1)
+
 #define GAC_ECO_BITS			_MMIO(0x14090)
 #define   ECOBITS_SNB_BIT		(1<<13)
 #define   ECOBITS_PPGTT_CACHE64B	(3<<8)
@@ -1669,6 +1672,9 @@ enum skl_disp_power_wells {
 
 #define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
 
+#define GAMT_CHKN_BIT_REG	_MMIO(0x4ab8)
+#define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1<<28)
+
 #if 0
 #define PRB0_TAIL	_MMIO(0x2030)
 #define PRB0_HEAD	_MMIO(0x2034)
@@ -1804,6 +1810,10 @@ enum skl_disp_power_wells {
 #define   GEN9_IZ_HASHING_MASK(slice)			(0x3 << ((slice) * 2))
 #define   GEN9_IZ_HASHING(slice, val)			((val) << ((slice) * 2))
 
+/* chicken reg for WaConextSwitchWithConcurrentTLBInvalidate */
+#define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20D4)
+#define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
+
 /* WaClearTdlStateAckDirtyBits */
 #define GEN8_STATE_ACK		_MMIO(0x20F0)
 #define GEN9_STATE_ACK_SLICE1	_MMIO(0x20F8)
@@ -2200,6 +2210,8 @@ enum skl_disp_power_wells {
 #define ILK_DPFC_STATUS		_MMIO(0x43210)
 #define ILK_DPFC_FENCE_YOFF	_MMIO(0x43218)
 #define ILK_DPFC_CHICKEN	_MMIO(0x43224)
+#define   ILK_DPFC_DISABLE_DUMMY0 (1<<8)
+#define   ILK_DPFC_NUKE_ON_ANY_MODIFICATION	(1<<23)
 #define ILK_FBC_RT_BASE		_MMIO(0x2128)
 #define   ILK_FBC_RT_VALID	(1<<0)
 #define   SNB_FBC_FRONT_BUFFER	(1<<1)
@@ -6031,6 +6043,7 @@ enum skl_disp_power_wells {
 #define CHICKEN_PAR1_1		_MMIO(0x42080)
 #define  DPA_MASK_VBLANK_SRD	(1 << 15)
 #define  FORCE_ARB_IDLE_PLANES	(1 << 14)
+#define  SKL_EDP_PSR_FIX_RDWRAP	(1 << 3)
 
 #define _CHICKEN_PIPESL_1_A	0x420b0
 #define _CHICKEN_PIPESL_1_B	0x420b4
@@ -6039,6 +6052,7 @@ enum skl_disp_power_wells {
 #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
 
 #define DISP_ARB_CTL	_MMIO(0x45000)
+#define  DISP_FBC_MEMORY_WAKE		(1<<31)
 #define  DISP_TILE_SURFACE_SWIZZLING	(1<<13)
 #define  DISP_FBC_WM_DIS		(1<<15)
 #define DISP_ARB_CTL2	_MMIO(0x45004)
@@ -6052,6 +6066,9 @@ enum skl_disp_power_wells {
 #define HSW_NDE_RSTWRN_OPT	_MMIO(0x46408)
 #define  RESET_PCH_HANDSHAKE_ENABLE	(1<<4)
 
+#define GEN8_CHICKEN_DCPR_1		_MMIO(0x46430)
+#define   MASK_WAKEMEM			(1<<13)
+
 #define SKL_DFSM			_MMIO(0x51000)
 #define SKL_DFSM_CDCLK_LIMIT_MASK	(3 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_675	(0 << 23)
@@ -6069,6 +6086,7 @@ enum skl_disp_power_wells {
 #define  GEN9_TSG_BARRIER_ACK_DISABLE		(1<<8)
 
 #define GEN9_CS_DEBUG_MODE1		_MMIO(0x20ec)
+#define GEN9_CTX_PREEMPT_REG		_MMIO(0x2248)
 #define GEN8_CS_CHICKEN1		_MMIO(0x2580)
 
 /* GEN7 chicken */
@@ -6076,6 +6094,7 @@ enum skl_disp_power_wells {
 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	((1<<10) | (1<<26))
 # define GEN9_RHWO_OPTIMIZATION_DISABLE		(1<<14)
 #define COMMON_SLICE_CHICKEN2			_MMIO(0x7014)
+# define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8)
 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE	(1<<0)
 
 #define HIZ_CHICKEN					_MMIO(0x7018)
@@ -6921,6 +6940,7 @@ enum skl_disp_power_wells {
 #define    EDRAM_SETS_IDX(cap)			(((cap) >> 8) & 0x3)
 
 #define GEN6_UCGCTL1				_MMIO(0x9400)
+# define GEN6_GAMUNIT_CLOCK_GATE_DISABLE		(1 << 22)
 # define GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE		(1 << 16)
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
@@ -6937,6 +6957,7 @@ enum skl_disp_power_wells {
 
 #define GEN7_UCGCTL4				_MMIO(0x940c)
 #define  GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1<<25)
+#define  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE	(1<<14)
 
 #define GEN6_RCGCTL1				_MMIO(0x9410)
 #define GEN6_RCGCTL2				_MMIO(0x9414)
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index a34c23eceba0..2b3b428d9cd2 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -41,16 +41,22 @@
  * be moved to FW_FAILED.
  */
 
+#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
+MODULE_FIRMWARE(I915_CSR_KBL);
+#define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
+
 #define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
+MODULE_FIRMWARE(I915_CSR_SKL);
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
+
 #define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
+MODULE_FIRMWARE(I915_CSR_BXT);
+#define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
 
 #define FIRMWARE_URL  "https://01.org/linuxgraphics/intel-linux-graphics-firmwares"
 
-MODULE_FIRMWARE(I915_CSR_SKL);
-MODULE_FIRMWARE(I915_CSR_BXT);
 
-#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
-#define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
+
 
 #define CSR_MAX_FW_SIZE			0x2FFF
 #define CSR_DEFAULT_FW_OFFSET		0xFFFFFFFF
@@ -169,12 +175,10 @@ struct stepping_info {
 	char substepping;
 };
 
-/*
- * Kabylake derivated from Skylake H0, so SKL H0
- * is the right firmware for KBL A0 (revid 0).
- */
 static const struct stepping_info kbl_stepping_info[] = {
-	{'H', '0'}, {'I', '0'}
+	{'A', '0'}, {'B', '0'}, {'C', '0'},
+	{'D', '0'}, {'E', '0'}, {'F', '0'},
+	{'G', '0'}, {'H', '0'}, {'I', '0'},
 };
 
 static const struct stepping_info skl_stepping_info[] = {
@@ -298,7 +302,9 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
 
 	csr->version = css_header->version;
 
-	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+	if (IS_KABYLAKE(dev_priv)) {
+		required_min_version = KBL_CSR_VERSION_REQUIRED;
+	} else if (IS_SKYLAKE(dev_priv)) {
 		required_min_version = SKL_CSR_VERSION_REQUIRED;
 	} else if (IS_BROXTON(dev_priv)) {
 		required_min_version = BXT_CSR_VERSION_REQUIRED;
@@ -446,7 +452,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 	if (!HAS_CSR(dev_priv))
 		return;
 
-	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+	if (IS_KABYLAKE(dev_priv))
+		csr->fw_path = I915_CSR_KBL;
+	else if (IS_SKYLAKE(dev_priv))
 		csr->fw_path = I915_CSR_SKL;
 	else if (IS_BROXTON(dev_priv))
 		csr->fw_path = I915_CSR_BXT;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 04452cf3eae8..3074c56a643d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11997,6 +11997,12 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 		ret = intel_color_check(crtc, crtc_state);
 		if (ret)
 			return ret;
+
+		/*
+		 * Changing color management on Intel hardware is
+		 * handled as part of planes update.
+		 */
+		crtc_state->planes_changed = true;
 	}
 
 	ret = 0;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 40745e38d438..891107f92d9f 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4645,7 +4645,7 @@ intel_dp_detect(struct drm_connector *connector, bool force)
 
 	intel_dp->detect_done = false;
 
-	if (intel_connector->detect_edid)
+	if (is_edp(intel_dp) || intel_connector->detect_edid)
 		return connector_status_connected;
 	else
 		return connector_status_disconnected;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 42eac37de047..7f2d8415ed8b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1103,15 +1103,17 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 						uint32_t *const batch,
 						uint32_t index)
 {
+	struct drm_i915_private *dev_priv = engine->dev->dev_private;
 	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	/*
-	 * WaDisableLSQCROPERFforOCL:skl
+	 * WaDisableLSQCROPERFforOCL:skl,kbl
 	 * This WA is implemented in skl_init_clock_gating() but since
 	 * this batch updates GEN8_L3SQCREG4 with default value we need to
 	 * set this bit here to retain the WA during flush.
 	 */
-	if (IS_SKL_REVID(engine->dev, 0, SKL_REVID_E0))
+	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) ||
+	    IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
 		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
 
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
@@ -1273,6 +1275,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 {
 	int ret;
 	struct drm_device *dev = engine->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
@@ -1286,6 +1289,22 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 		return ret;
 	index = ret;
 
+	/* WaClearSlmSpaceAtContextSwitch:kbl */
+	/* Actual scratch location is at 128 bytes offset */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
+		uint32_t scratch_addr
+			= engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
+
+		wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
+		wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
+					   PIPE_CONTROL_GLOBAL_GTT_IVB |
+					   PIPE_CONTROL_CS_STALL |
+					   PIPE_CONTROL_QW_WRITE));
+		wa_ctx_emit(batch, index, scratch_addr);
+		wa_ctx_emit(batch, index, 0);
+		wa_ctx_emit(batch, index, 0);
+		wa_ctx_emit(batch, index, 0);
+	}
 	/* Pad to end of cacheline */
 	while (index % CACHELINE_DWORDS)
 		wa_ctx_emit(batch, index, MI_NOOP);
@@ -1687,9 +1706,10 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 	struct intel_ringbuffer *ringbuf = request->ringbuf;
 	struct intel_engine_cs *engine = ringbuf->engine;
 	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-	bool vf_flush_wa = false;
+	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 flags = 0;
 	int ret;
+	int len;
 
 	flags |= PIPE_CONTROL_CS_STALL;
 
@@ -1716,9 +1736,21 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 		 */
 		if (IS_GEN9(engine->dev))
 			vf_flush_wa = true;
+
+		/* WaForGAMHang:kbl */
+		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
+			dc_flush_wa = true;
 	}
 
-	ret = intel_ring_begin(request, vf_flush_wa ? 12 : 6);
+	len = 6;
+
+	if (vf_flush_wa)
+		len += 6;
+
+	if (dc_flush_wa)
+		len += 12;
+
+	ret = intel_ring_begin(request, len);
 	if (ret)
 		return ret;
 
@@ -1731,12 +1763,31 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 		intel_logical_ring_emit(ringbuf, 0);
 	}
 
+	if (dc_flush_wa) {
+		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+		intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+	}
+
 	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
 	intel_logical_ring_emit(ringbuf, flags);
 	intel_logical_ring_emit(ringbuf, scratch_addr);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, 0);
+
+	if (dc_flush_wa) {
+		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+		intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+	}
+
 	intel_logical_ring_advance(ringbuf);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 99e26034ae8d..16e209d326b6 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -1038,5 +1038,16 @@ intel_opregion_get_panel_type(struct drm_device *dev)
 		return -ENODEV;
 	}
 
+	/*
+	 * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us
+	 * low vswing for eDP, whereas the VBT panel type (2) gives us normal
+	 * vswing instead. Low vswing results in some display flickers, so
+	 * let's simply ignore the OpRegion panel type on SKL for now.
+	 */
+	if (IS_SKYLAKE(dev)) {
+		DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1);
+		return -ENODEV;
+	}
+
 	return ret - 1;
 }
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 8357d571553a..aba94099886b 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1731,7 +1731,8 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel)
 		panel->backlight.set = bxt_set_backlight;
 		panel->backlight.get = bxt_get_backlight;
 		panel->backlight.hz_to_pwm = bxt_hz_to_pwm;
-	} else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_SPT(dev_priv)) {
+	} else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_SPT(dev_priv) ||
+		   HAS_PCH_KBP(dev_priv)) {
 		panel->backlight.setup = lpt_setup_backlight;
 		panel->backlight.enable = lpt_enable_backlight;
 		panel->backlight.disable = lpt_disable_backlight;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a7ef45da0a9e..2863b92c9da6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -54,10 +54,38 @@
 #define INTEL_RC6p_ENABLE			(1<<1)
 #define INTEL_RC6pp_ENABLE			(1<<2)
 
+static void gen9_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
+	I915_WRITE(CHICKEN_PAR1_1,
+		   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+
+	I915_WRITE(GEN8_CONFIG0,
+		   I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
+
+	/* WaEnableChickenDCPR:skl,bxt,kbl */
+	I915_WRITE(GEN8_CHICKEN_DCPR_1,
+		   I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+
+	/* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */
+	/* WaFbcWakeMemOn:skl,bxt,kbl */
+	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+		   DISP_FBC_WM_DIS |
+		   DISP_FBC_MEMORY_WAKE);
+
+	/* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_DISABLE_DUMMY0);
+}
+
 static void bxt_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	gen9_init_clock_gating(dev);
+
 	/* WaDisableSDEUnitClockGating:bxt */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
@@ -6698,6 +6726,38 @@ static void lpt_suspend_hw(struct drm_device *dev)
 	}
 }
 
+static void kabylake_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	gen9_init_clock_gating(dev);
+
+	/* WaDisableSDEUnitClockGating:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableGamClockGating:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
+			   GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaFbcNukeOnHostModify:kbl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+}
+
+static void skylake_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	gen9_init_clock_gating(dev);
+
+	/* WaFbcNukeOnHostModify:skl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+}
+
 static void broadwell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -7163,9 +7223,9 @@ static void nop_init_clock_gating(struct drm_device *dev)
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
 	if (IS_SKYLAKE(dev_priv))
-		dev_priv->display.init_clock_gating = nop_init_clock_gating;
+		dev_priv->display.init_clock_gating = skylake_init_clock_gating;
 	else if (IS_KABYLAKE(dev_priv))
-		dev_priv->display.init_clock_gating = nop_init_clock_gating;
+		dev_priv->display.init_clock_gating = kabylake_init_clock_gating;
 	else if (IS_BROXTON(dev_priv))
 		dev_priv->display.init_clock_gating = bxt_init_clock_gating;
 	else if (IS_BROADWELL(dev_priv))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 04402bb9d26b..68c5af079ef8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -913,24 +913,26 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 {
 	struct drm_device *dev = engine->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t tmp;
 	int ret;
 
-	/* WaEnableLbsSlaRetryTimerDecrement:skl */
+	/* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */
+	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
+
+	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */
 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
-	/* WaDisableKillLogic:bxt,skl */
+	/* WaDisableKillLogic:bxt,skl,kbl */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
 		   ECOCHK_DIS_TLB);
 
-	/* WaClearFlowControlGpgpuContextSave:skl,bxt */
-	/* WaDisablePartialInstShootdown:skl,bxt */
+	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */
+	/* WaDisablePartialInstShootdown:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 			  FLOW_CONTROL_ENABLE |
 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
-	/* Syncing dependencies between camera and graphics:skl,bxt */
+	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
@@ -952,18 +954,18 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 		 */
 	}
 
-	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
-	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt */
+	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */
+	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 			  GEN9_ENABLE_YV12_BUGFIX |
 			  GEN9_ENABLE_GPGPU_PREEMPTION);
 
-	/* Wa4x4STCOptimizationDisable:skl,bxt */
-	/* WaDisablePartialResolveInVc:skl,bxt */
+	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl */
+	/* WaDisablePartialResolveInVc:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
 
-	/* WaCcsTlbPrefetchDisable:skl,bxt */
+	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl */
 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
@@ -973,31 +975,57 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
 				  PIXEL_MASK_CAMMING_DISABLE);
 
-	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
-	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
-	if (IS_SKL_REVID(dev, SKL_REVID_F0, REVID_FOREVER) ||
-	    IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
-		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
-	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
+
+	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
+	 * both tied to WaForceContextSaveRestoreNonCoherent
+	 * in some hsds for skl. We keep the tie for all gen9. The
+	 * documentation is a bit hazy and so we want to get common behaviour,
+	 * even though there is no clear evidence we would need both on kbl/bxt.
+	 * This area has been source of system hangs so we play it safe
+	 * and mimic the skl regardless of what bspec says.
+	 *
+	 * Use Force Non-Coherent whenever executing a 3D context. This
+	 * is a workaround for a possible hang in the unlikely event
+	 * a TLB invalidation occurs during a PSD flush.
+	 */
 
-	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
-	if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
+	/* WaForceEnableNonCoherent:skl,bxt,kbl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_NON_COHERENT);
+
+	/* WaDisableHDCInvalidation:skl,bxt,kbl */
+	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+		   BDW_DISABLE_HDC_INVALIDATION);
+
+	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */
+	if (IS_SKYLAKE(dev_priv) ||
+	    IS_KABYLAKE(dev_priv) ||
+	    IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
 
-	/* WaDisableSTUnitPowerOptimization:skl,bxt */
+	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
-	/* WaOCLCoherentLineFlush:skl,bxt */
+	/* WaOCLCoherentLineFlush:skl,bxt,kbl */
 	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
 				    GEN8_LQSC_FLUSH_COHERENT_LINES));
 
-	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
+	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */
+	ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
+	if (ret)
+		return ret;
+
+	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */
 	ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
 	if (ret)
 		return ret;
 
-	/* WaAllowUMDToModifyHDCChicken1:skl,bxt */
+	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
 	if (ret)
 		return ret;
@@ -1092,22 +1120,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
 		WA_SET_BIT_MASKED(HIZ_CHICKEN,
 				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-	/* This is tied to WaForceContextSaveRestoreNonCoherent */
-	if (IS_SKL_REVID(dev, 0, REVID_FOREVER)) {
-		/*
-		 *Use Force Non-Coherent whenever executing a 3D context. This
-		 * is a workaround for a possible hang in the unlikely event
-		 * a TLB invalidation occurs during a PSD flush.
-		 */
-		/* WaForceEnableNonCoherent:skl */
-		WA_SET_BIT_MASKED(HDC_CHICKEN0,
-				  HDC_FORCE_NON_COHERENT);
-
-		/* WaDisableHDCInvalidation:skl */
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   BDW_DISABLE_HDC_INVALIDATION);
-	}
-
 	/* WaBarrierPerformanceFixDisable:skl */
 	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
@@ -1120,6 +1132,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 
+	/* WaDisableGafsUnitClkGating:skl */
+	WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
 	/* WaDisableLSQCROPERFforOCL:skl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
 	if (ret)
@@ -1174,6 +1189,63 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
 			return ret;
 	}
 
+	/* WaInsertDummyPushConstPs:bxt */
+	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	return 0;
+}
+
+static int kbl_init_workarounds(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	int ret;
+
+	ret = gen9_init_workarounds(engine);
+	if (ret)
+		return ret;
+
+	/* WaEnableGapsTsvCreditFix:kbl */
+	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
+				   GEN9_GAPS_TSV_CREDIT_DISABLE));
+
+	/* WaDisableDynamicCreditSharing:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		WA_SET_BIT(GAMT_CHKN_BIT_REG,
+			   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
+
+	/* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
+		WA_SET_BIT_MASKED(HDC_CHICKEN0,
+				  HDC_FENCE_DEST_SLM_DISABLE);
+
+	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
+	 * involving this register should also be added to WA batch as required.
+	 */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
+		/* WaDisableLSQCROPERFforOCL:kbl */
+		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+			   GEN8_LQSC_RO_PERF_DIS);
+
+	/* WaInsertDummyPushConstPs:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	/* WaDisableGafsUnitClkGating:kbl */
+	WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableSbeCacheDispatchPortSharing:kbl */
+	WA_SET_BIT_MASKED(
+		GEN7_HALF_SLICE_CHICKEN1,
+		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+
+	/* WaDisableLSQCROPERFforOCL:kbl */
+	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -1199,6 +1271,9 @@ int init_workarounds_ring(struct intel_engine_cs *engine)
 	if (IS_BROXTON(dev))
 		return bxt_init_workarounds(engine);
 
+	if (IS_KABYLAKE(dev_priv))
+		return kbl_init_workarounds(engine);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 39386f50af87..a71cf98c655f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1034,9 +1034,9 @@ out_unlock:
 	return ret;
 }
 
-static bool ttm_bo_mem_compat(struct ttm_placement *placement,
-			      struct ttm_mem_reg *mem,
-			      uint32_t *new_flags)
+bool ttm_bo_mem_compat(struct ttm_placement *placement,
+		       struct ttm_mem_reg *mem,
+		       uint32_t *new_flags)
 {
 	int i;
 
@@ -1068,6 +1068,7 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
 
 	return false;
 }
+EXPORT_SYMBOL(ttm_bo_mem_compat);
 
 int ttm_bo_validate(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index 9b078a493996..0cd889015dc5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -49,6 +49,7 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -60,7 +61,12 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, placement, interruptible, false);
+
 	if (!ret)
 		vmw_bo_pin_reserved(buf, true);
 
@@ -91,6 +97,7 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -102,6 +109,12 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
+	if (buf->pin_count > 0) {
+		ret = ttm_bo_mem_compat(&vmw_vram_gmr_placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+		goto out_unreserve;
+	}
+
 	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, interruptible,
 			      false);
 	if (likely(ret == 0) || ret == -ERESTARTSYS)
@@ -161,6 +174,7 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	struct ttm_placement placement;
 	struct ttm_place place;
 	int ret = 0;
+	uint32_t new_flags;
 
 	place = vmw_vram_placement.placement[0];
 	place.lpfn = bo->num_pages;
@@ -185,10 +199,15 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	 */
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
 	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
+	    bo->mem.start > 0 &&
+	    buf->pin_count == 0)
 		(void) ttm_bo_validate(bo, &vmw_sys_placement, false, false);
 
-	ret = ttm_bo_validate(bo, &placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(&placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, &placement, interruptible, false);
 
 	/* For some reason we didn't end up at the start of vram */
 	WARN_ON(ret == 0 && bo->offset != 0);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9fcd8200d485..8d528fcf6e96 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -233,6 +233,7 @@ static int vmw_force_iommu;
 static int vmw_restrict_iommu;
 static int vmw_force_coherent;
 static int vmw_restrict_dma_mask;
+static int vmw_assume_16bpp;
 
 static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
 static void vmw_master_init(struct vmw_master *);
@@ -249,6 +250,8 @@ MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
 module_param_named(force_coherent, vmw_force_coherent, int, 0600);
 MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
 module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
+MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
+module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
 
 static void vmw_print_capabilities(uint32_t capabilities)
@@ -660,6 +663,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
 	dev_priv->mmio_start = pci_resource_start(dev->pdev, 2);
 
+	dev_priv->assume_16bpp = !!vmw_assume_16bpp;
+
 	dev_priv->enable_fb = enable_fbdev;
 
 	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
@@ -706,6 +711,13 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			vmw_read(dev_priv,
 				 SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB);
 
+		/*
+		 * Workaround for low memory 2D VMs to compensate for the
+		 * allocation taken by fbdev
+		 */
+		if (!(dev_priv->capabilities & SVGA_CAP_3D))
+			mem_size *= 2;
+
 		dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE;
 		dev_priv->prim_bb_mem =
 			vmw_read(dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 1980e2a28265..89fb19443a3f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -386,6 +386,7 @@ struct vmw_private {
 	spinlock_t hw_lock;
 	spinlock_t cap_lock;
 	bool has_dx;
+	bool assume_16bpp;
 
 	/*
 	 * VGA registers.
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 679a4cb98ee3..d2d93959b119 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -517,28 +517,6 @@ static int vmw_fb_kms_framebuffer(struct fb_info *info)
 
 	par->set_fb = &vfb->base;
 
-	if (!par->bo_ptr) {
-		/*
-		 * Pin before mapping. Since we don't know in what placement
-		 * to pin, call into KMS to do it for us.
-		 */
-		ret = vfb->pin(vfb);
-		if (ret) {
-			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
-				  par->vmw_bo->base.num_pages, &par->map);
-		if (ret) {
-			vfb->unpin(vfb);
-			DRM_ERROR("Could not map the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
-	}
-
 	return 0;
 }
 
@@ -601,6 +579,31 @@ static int vmw_fb_set_par(struct fb_info *info)
 	if (ret)
 		goto out_unlock;
 
+	if (!par->bo_ptr) {
+		struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(set.fb);
+
+		/*
+		 * Pin before mapping. Since we don't know in what placement
+		 * to pin, call into KMS to do it for us.
+		 */
+		ret = vfb->pin(vfb);
+		if (ret) {
+			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
+				  par->vmw_bo->base.num_pages, &par->map);
+		if (ret) {
+			vfb->unpin(vfb);
+			DRM_ERROR("Could not map the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
+	}
+
+
 	vmw_fb_dirty_mark(par, par->fb_x, par->fb_y,
 			  par->set_fb->width, par->set_fb->height);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 55231cce73a0..e29da45a2847 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1553,14 +1553,10 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector,
 		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
 	};
 	int i;
-	u32 assumed_bpp = 2;
+	u32 assumed_bpp = 4;
 
-	/*
-	 * If using screen objects, then assume 32-bpp because that's what the
-	 * SVGA device is assuming
-	 */
-	if (dev_priv->active_display_unit == vmw_du_screen_object)
-		assumed_bpp = 4;
+	if (dev_priv->assume_16bpp)
+		assumed_bpp = 2;
 
 	if (dev_priv->active_display_unit == vmw_du_screen_target) {
 		max_width  = min(max_width,  dev_priv->stdu_max_width);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index f0374f9b56ca..e57a0bad7a62 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -300,6 +300,9 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
 		break;
 	}
 
+	if (retries == RETRIES)
+		return -EINVAL;
+
 	*msg_len = reply_len;
 	*msg     = reply;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 9ca818fb034c..41932a7c4f79 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -399,8 +399,10 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 
 	WARN_ON_ONCE(!stdu->defined);
 
-	if (!vfb->dmabuf && new_fb->width == mode->hdisplay &&
-	    new_fb->height == mode->vdisplay)
+	new_vfbs = (vfb->dmabuf) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
+
+	if (new_vfbs && new_vfbs->surface->base_size.width == mode->hdisplay &&
+	    new_vfbs->surface->base_size.height == mode->vdisplay)
 		new_content_type = SAME_AS_DISPLAY;
 	else if (vfb->dmabuf)
 		new_content_type = SEPARATE_DMA;
@@ -444,7 +446,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			content_srf.mip_levels[0]     = 1;
 			content_srf.multisample_count = 0;
 		} else {
-			new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 			content_srf = *new_vfbs->surface;
 		}
 
@@ -464,7 +465,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			return ret;
 		}
 	} else if (new_content_type == SAME_AS_DISPLAY) {
-		new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 		new_display_srf = vmw_surface_reference(new_vfbs->surface);
 	}
 
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index cc6439ab3f71..041050edd809 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1268,6 +1268,8 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
 		}
 	}
 
+	idx = 0;
+
 	do {
 		if (msgs[idx].len == 0) {
 			ret = -EINVAL;
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 445398c314a3..b126dbaa47e3 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -912,7 +912,7 @@ static int tegra_i2c_probe(struct platform_device *pdev)
 	ret = tegra_i2c_init(i2c_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize i2c controller");
-		goto unprepare_div_clk;
+		goto disable_div_clk;
 	}
 
 	ret = devm_request_irq(&pdev->dev, i2c_dev->irq,
diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index e33022e2d459..6e5fac6a5262 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -56,9 +56,7 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num);
  * The board info passed can safely be __initdata, but be careful of embedded
  * pointers (for platform_data, functions, etc) since that won't be copied.
  */
-int __init
-i2c_register_board_info(int busnum,
-	struct i2c_board_info const *info, unsigned len)
+int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len)
 {
 	int status;
 
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 26e7c5187a58..c6a90b4a9c62 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -145,7 +145,7 @@ static int i2c_mux_reg_probe_dt(struct regmux *mux,
 		mux->data.idle_in_use = true;
 
 	/* map address from "reg" if exists */
-	if (of_address_to_resource(np, 0, &res)) {
+	if (of_address_to_resource(np, 0, &res) == 0) {
 		mux->data.reg_size = resource_size(&res);
 		mux->data.reg = devm_ioremap_resource(&pdev->dev, &res);
 		if (IS_ERR(mux->data.reg))
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index a5793c8f1590..60df4f8e81be 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -530,6 +530,7 @@ static PORT_PMA_ATTR(port_xmit_data		    , 12, 32, 192);
 static PORT_PMA_ATTR(port_rcv_data		    , 13, 32, 224);
 static PORT_PMA_ATTR(port_xmit_packets		    , 14, 32, 256);
 static PORT_PMA_ATTR(port_rcv_packets		    , 15, 32, 288);
+static PORT_PMA_ATTR(port_xmit_wait		    ,  0, 32, 320);
 
 /*
  * Counters added by extended set
@@ -560,6 +561,7 @@ static struct attribute *pma_attrs[] = {
 	&port_pma_attr_port_rcv_data.attr.attr,
 	&port_pma_attr_port_xmit_packets.attr.attr,
 	&port_pma_attr_port_rcv_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	NULL
 };
 
@@ -579,6 +581,7 @@ static struct attribute *pma_attrs_ext[] = {
 	&port_pma_attr_ext_port_xmit_data.attr.attr,
 	&port_pma_attr_ext_port_rcv_data.attr.attr,
 	&port_pma_attr_ext_port_xmit_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	&port_pma_attr_ext_port_rcv_packets.attr.attr,
 	&port_pma_attr_ext_unicast_rcv_packets.attr.attr,
 	&port_pma_attr_ext_unicast_xmit_packets.attr.attr,
@@ -604,6 +607,7 @@ static struct attribute *pma_attrs_noietf[] = {
 	&port_pma_attr_ext_port_rcv_data.attr.attr,
 	&port_pma_attr_ext_port_xmit_packets.attr.attr,
 	&port_pma_attr_ext_port_rcv_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	NULL
 };
 
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index f5de85178055..dad4d0ebbdff 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -14113,8 +14113,14 @@ static int init_asic_data(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
 	struct hfi1_devdata *tmp, *peer = NULL;
+	struct hfi1_asic_data *asic_data;
 	int ret = 0;
 
+	/* pre-allocate the asic structure in case we are the first device */
+	asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+	if (!asic_data)
+		return -ENOMEM;
+
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	/* Find our peer device */
 	list_for_each_entry(tmp, &hfi1_dev_list, list) {
@@ -14126,18 +14132,14 @@ static int init_asic_data(struct hfi1_devdata *dd)
 	}
 
 	if (peer) {
+		/* use already allocated structure */
 		dd->asic_data = peer->asic_data;
+		kfree(asic_data);
 	} else {
-		dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
-		if (!dd->asic_data) {
-			ret = -ENOMEM;
-			goto done;
-		}
+		dd->asic_data = asic_data;
 		mutex_init(&dd->asic_data->asic_resource_mutex);
 	}
 	dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
-
-done:
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
 	return ret;
 }
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 1e503ad0bebb..be91f6fa1c87 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -678,8 +678,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	u32 tlen = packet->tlen;
 	struct rvt_qp *qp = packet->qp;
 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
-	bool sc4_bit = has_sc4_bit(packet);
-	u8 sc;
+	u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
 	u32 bth1;
 	int is_mcast;
 	struct ib_grh *grh = NULL;
@@ -697,10 +696,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		 */
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-		u8 sl, sc5;
+		u8 sl;
 
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 		sl = ibp->sc_to_sl[sc5];
 
 		process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
@@ -717,10 +714,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 
 	if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
 		u16 slid = be16_to_cpu(hdr->lrh[3]);
-		u8 sc5;
-
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 
 		return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
 	}
@@ -745,10 +738,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		if (qp->ibqp.qp_num > 1) {
 			struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 			u16 slid;
-			u8 sc5;
-
-			sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-			sc5 |= sc4_bit;
 
 			slid = be16_to_cpu(hdr->lrh[3]);
 			if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
@@ -790,10 +779,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		/* Received on QP0, and so by definition, this is an SMP */
 		struct opa_smp *smp = (struct opa_smp *)data;
 		u16 slid = be16_to_cpu(hdr->lrh[3]);
-		u8 sc5;
-
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 
 		if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
 			goto drop;
@@ -890,9 +875,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	}
 
 	wc.slid = be16_to_cpu(hdr->lrh[3]);
-	sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-	sc |= sc4_bit;
-	wc.sl = ibp->sc_to_sl[sc];
+	wc.sl = ibp->sc_to_sl[sc5];
 
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index c963cad92f5a..6e9081380a27 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -600,8 +600,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
 	cqp_init_info.scratch_array = cqp->scratch_array;
 	status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
 	if (status) {
-		i40iw_pr_err("cqp init status %d maj_err %d min_err %d\n",
-			     status, maj_err, min_err);
+		i40iw_pr_err("cqp init status %d\n", status);
 		goto exit;
 	}
 	status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 33959ed14563..283b64c942ee 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1474,6 +1474,7 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr
 	info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
 	info->pd_id = iwpd->sc_pd.pd_id;
 	info->total_len = iwmr->length;
+	info->remote_access = true;
 	cqp_info->cqp_cmd = OP_ALLOC_STAG;
 	cqp_info->post_sq = 1;
 	cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 3438e98c145a..a529a4535457 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1431,6 +1431,9 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 	int ep_irq_in_idx;
 	int i, error;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints != 2)
+		return -ENODEV;
+
 	for (i = 0; xpad_device[i].idVendor; i++) {
 		if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) &&
 		    (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct))
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index b368b0515c5a..253df96be427 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -157,11 +157,11 @@ static int rmi_function_match(struct device *dev, struct device_driver *drv)
 static void rmi_function_of_probe(struct rmi_function *fn)
 {
 	char of_name[9];
+	struct device_node *node = fn->rmi_dev->xport->dev->of_node;
 
 	snprintf(of_name, sizeof(of_name), "rmi4-f%02x",
 		fn->fd.function_number);
-	fn->dev.of_node = of_find_node_by_name(
-				fn->rmi_dev->xport->dev->of_node, of_name);
+	fn->dev.of_node = of_get_child_by_name(node, of_name);
 }
 #else
 static inline void rmi_function_of_probe(struct rmi_function *fn)
diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
index 8dd3fb5e1f94..88e91559c84e 100644
--- a/drivers/input/rmi4/rmi_f12.c
+++ b/drivers/input/rmi4/rmi_f12.c
@@ -66,7 +66,7 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
 	struct rmi_device *rmi_dev = fn->rmi_dev;
 	int ret;
 	int offset;
-	u8 buf[14];
+	u8 buf[15];
 	int pitch_x = 0;
 	int pitch_y = 0;
 	int clip_x_low = 0;
@@ -86,9 +86,10 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
 
 	offset = rmi_register_desc_calc_reg_offset(&f12->control_reg_desc, 8);
 
-	if (item->reg_size > 14) {
-		dev_err(&fn->dev, "F12 control8 should be 14 bytes, not: %ld\n",
-			item->reg_size);
+	if (item->reg_size > sizeof(buf)) {
+		dev_err(&fn->dev,
+			"F12 control8 should be no bigger than %zd bytes, not: %ld\n",
+			sizeof(buf), item->reg_size);
 		return -ENODEV;
 	}
 
diff --git a/drivers/input/touchscreen/ts4800-ts.c b/drivers/input/touchscreen/ts4800-ts.c
index 3c3dd78303be..fed73eeb47b3 100644
--- a/drivers/input/touchscreen/ts4800-ts.c
+++ b/drivers/input/touchscreen/ts4800-ts.c
@@ -118,6 +118,13 @@ static int ts4800_parse_dt(struct platform_device *pdev,
 		return -ENODEV;
 	}
 
+	ts->regmap = syscon_node_to_regmap(syscon_np);
+	of_node_put(syscon_np);
+	if (IS_ERR(ts->regmap)) {
+		dev_err(dev, "cannot get parent's regmap\n");
+		return PTR_ERR(ts->regmap);
+	}
+
 	error = of_property_read_u32_index(np, "syscon", 1, &reg);
 	if (error < 0) {
 		dev_err(dev, "no offset in syscon\n");
@@ -134,12 +141,6 @@ static int ts4800_parse_dt(struct platform_device *pdev,
 
 	ts->bit = BIT(bit);
 
-	ts->regmap = syscon_node_to_regmap(syscon_np);
-	if (IS_ERR(ts->regmap)) {
-		dev_err(dev, "cannot get parent's regmap\n");
-		return PTR_ERR(ts->regmap);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c
index 7295c198aa08..6fe55d598fac 100644
--- a/drivers/input/touchscreen/tsc2004.c
+++ b/drivers/input/touchscreen/tsc2004.c
@@ -22,6 +22,11 @@
 #include <linux/regmap.h>
 #include "tsc200x-core.h"
 
+static const struct input_id tsc2004_input_id = {
+	.bustype = BUS_I2C,
+	.product = 2004,
+};
+
 static int tsc2004_cmd(struct device *dev, u8 cmd)
 {
 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
@@ -42,7 +47,7 @@ static int tsc2004_probe(struct i2c_client *i2c,
 			 const struct i2c_device_id *id)
 
 {
-	return tsc200x_probe(&i2c->dev, i2c->irq, BUS_I2C,
+	return tsc200x_probe(&i2c->dev, i2c->irq, &tsc2004_input_id,
 			     devm_regmap_init_i2c(i2c, &tsc200x_regmap_config),
 			     tsc2004_cmd);
 }
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index b9f593dfd2ef..f2c5f0e47f77 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -24,6 +24,11 @@
 #include <linux/regmap.h>
 #include "tsc200x-core.h"
 
+static const struct input_id tsc2005_input_id = {
+	.bustype = BUS_SPI,
+	.product = 2005,
+};
+
 static int tsc2005_cmd(struct device *dev, u8 cmd)
 {
 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
@@ -62,7 +67,7 @@ static int tsc2005_probe(struct spi_device *spi)
 	if (error)
 		return error;
 
-	return tsc200x_probe(&spi->dev, spi->irq, BUS_SPI,
+	return tsc200x_probe(&spi->dev, spi->irq, &tsc2005_input_id,
 			     devm_regmap_init_spi(spi, &tsc200x_regmap_config),
 			     tsc2005_cmd);
 }
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index 15240c1ee850..dfa7f1c4f545 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -450,7 +450,7 @@ static void tsc200x_close(struct input_dev *input)
 	mutex_unlock(&ts->mutex);
 }
 
-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
+int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd))
 {
@@ -547,9 +547,18 @@ int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
 	snprintf(ts->phys, sizeof(ts->phys),
 		 "%s/input-ts", dev_name(dev));
 
-	input_dev->name = "TSC200X touchscreen";
+	if (tsc_id->product == 2004) {
+		input_dev->name = "TSC200X touchscreen";
+	} else {
+		input_dev->name = devm_kasprintf(dev, GFP_KERNEL,
+						 "TSC%04d touchscreen",
+						 tsc_id->product);
+		if (!input_dev->name)
+			return -ENOMEM;
+	}
+
 	input_dev->phys = ts->phys;
-	input_dev->id.bustype = bustype;
+	input_dev->id = *tsc_id;
 	input_dev->dev.parent = dev;
 	input_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
 	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h
index 7a482d102614..49a63a3c6840 100644
--- a/drivers/input/touchscreen/tsc200x-core.h
+++ b/drivers/input/touchscreen/tsc200x-core.h
@@ -70,7 +70,7 @@
 extern const struct regmap_config tsc200x_regmap_config;
 extern const struct dev_pm_ops tsc200x_pm_ops;
 
-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
+int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd));
 int tsc200x_remove(struct device *dev);
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index 0c9191cf324d..b6fc4bde79de 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -155,6 +155,7 @@ static void parse_multi_touch(struct w8001 *w8001)
 		bool touch = data[0] & (1 << i);
 
 		input_mt_slot(dev, i);
+		input_mt_report_slot_state(dev, MT_TOOL_FINGER, touch);
 		if (touch) {
 			x = (data[6 * i + 1] << 7) | data[6 * i + 2];
 			y = (data[6 * i + 3] << 7) | data[6 * i + 4];
@@ -522,6 +523,8 @@ static int w8001_setup_touch(struct w8001 *w8001, char *basename,
 					0, touch.x, 0, 0);
 		input_set_abs_params(dev, ABS_MT_POSITION_Y,
 					0, touch.y, 0, 0);
+		input_set_abs_params(dev, ABS_MT_TOOL_TYPE,
+					0, MT_TOOL_MAX, 0, 0);
 
 		strlcat(basename, " 2FG", basename_sz);
 		if (w8001->max_pen_x && w8001->max_pen_y)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index ad0860383cb3..d432ca828472 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -89,8 +89,8 @@ config MSM_IOMMU
 	bool "MSM IOMMU Support"
 	depends on ARM
 	depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST
-	depends on BROKEN
 	select IOMMU_API
+	select IOMMU_IO_PGTABLE_ARMV7S
 	help
 	  Support for the IOMMUs found on certain Qualcomm SOCs.
 	  These IOMMUs allow virtualization of the address space used by most
@@ -111,6 +111,7 @@ config AMD_IOMMU
 	select PCI_PRI
 	select PCI_PASID
 	select IOMMU_API
+	select IOMMU_IOVA
 	depends on X86_64 && PCI && ACPI
 	---help---
 	  With this option you can enable support for AMD IOMMU hardware in
@@ -343,4 +344,22 @@ config MTK_IOMMU
 
 	  If unsure, say N here.
 
+config MTK_IOMMU_V1
+	bool "MTK IOMMU Version 1 (M4U gen1) Support"
+	depends on ARM
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select ARM_DMA_USE_IOMMU
+	select IOMMU_API
+	select MEMORY
+	select MTK_SMI
+	select COMMON_CLK_MT2701_MMSYS
+	select COMMON_CLK_MT2701_IMGSYS
+	select COMMON_CLK_MT2701_VDECSYS
+	help
+	  Support for the M4U on certain Mediatek SoCs. M4U generation 1 HW is
+	  Multimedia Memory Managememt Unit. This option enables remapping of
+	  DMA memory accesses for the multimedia subsystem.
+
+	  if unsure, say N here.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6edb31bf8c6..195f7b997d8e 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
 obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o
-obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
+obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
 obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
@@ -18,6 +18,7 @@ obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o
+obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 634f636393d5..33c177ba93be 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -21,6 +21,7 @@
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <linux/amba/bus.h>
+#include <linux/platform_device.h>
 #include <linux/pci-ats.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
@@ -38,6 +39,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/irqdomain.h>
 #include <linux/percpu.h>
+#include <linux/iova.h>
 #include <asm/irq_remapping.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
@@ -56,6 +58,17 @@
 
 #define LOOP_TIMEOUT	100000
 
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN		(1)
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
+
+/* Reserved IOVA ranges */
+#define MSI_RANGE_START		(0xfee00000)
+#define MSI_RANGE_END		(0xfeefffff)
+#define HT_RANGE_START		(0xfd00000000ULL)
+#define HT_RANGE_END		(0xffffffffffULL)
+
 /*
  * This bitmap is used to advertise the page sizes our hardware support
  * to the IOMMU core, which will then use this information to split
@@ -76,6 +89,25 @@ LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
 LIST_HEAD(acpihid_map);
 
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+	unsigned long iova_pfn;
+	unsigned long pages;
+	struct dma_ops_domain *dma_dom;
+};
+
+struct flush_queue {
+	spinlock_t lock;
+	unsigned next;
+	struct flush_queue_entry *entries;
+};
+
+DEFINE_PER_CPU(struct flush_queue, flush_queue);
+
+static atomic_t queue_timer_on;
+static struct timer_list queue_timer;
+
 /*
  * Domain for untranslated devices - only allocated
  * if iommu=pt passed on kernel cmd line.
@@ -121,44 +153,19 @@ static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
 /*
- * For dynamic growth the aperture size is split into ranges of 128MB of
- * DMA address space each. This struct represents one such range.
- */
-struct aperture_range {
-
-	spinlock_t bitmap_lock;
-
-	/* address allocation bitmap */
-	unsigned long *bitmap;
-	unsigned long offset;
-	unsigned long next_bit;
-
-	/*
-	 * Array of PTE pages for the aperture. In this array we save all the
-	 * leaf pages of the domain page table used for the aperture. This way
-	 * we don't need to walk the page table to find a specific PTE. We can
-	 * just calculate its address in constant time.
-	 */
-	u64 *pte_pages[64];
-};
-
-/*
  * Data container for a dma_ops specific protection domain
  */
 struct dma_ops_domain {
 	/* generic protection domain information */
 	struct protection_domain domain;
 
-	/* size of the aperture for the mappings */
-	unsigned long aperture_size;
-
-	/* aperture index we start searching for free addresses */
-	u32 __percpu *next_index;
-
-	/* address space relevant data */
-	struct aperture_range *aperture[APERTURE_MAX_RANGES];
+	/* IOVA RB-Tree */
+	struct iova_domain iovad;
 };
 
+static struct iova_domain reserved_iova_ranges;
+static struct lock_class_key reserved_rbtree_key;
+
 /****************************************************************************
  *
  * Helper functions
@@ -224,6 +231,12 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
 	return container_of(dom, struct protection_domain, domain);
 }
 
+static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain)
+{
+	BUG_ON(domain->flags != PD_DMA_OPS_MASK);
+	return container_of(domain, struct dma_ops_domain, domain);
+}
+
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
@@ -391,43 +404,6 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
 }
 
 /*
- * This function actually applies the mapping to the page table of the
- * dma_ops domain.
- */
-static void alloc_unity_mapping(struct dma_ops_domain *dma_dom,
-				struct unity_map_entry *e)
-{
-	u64 addr;
-
-	for (addr = e->address_start; addr < e->address_end;
-	     addr += PAGE_SIZE) {
-		if (addr < dma_dom->aperture_size)
-			__set_bit(addr >> PAGE_SHIFT,
-				  dma_dom->aperture[0]->bitmap);
-	}
-}
-
-/*
- * Inits the unity mappings required for a specific device
- */
-static void init_unity_mappings_for_device(struct device *dev,
-					   struct dma_ops_domain *dma_dom)
-{
-	struct unity_map_entry *e;
-	int devid;
-
-	devid = get_device_id(dev);
-	if (devid < 0)
-		return;
-
-	list_for_each_entry(e, &amd_iommu_unity_map, list) {
-		if (!(devid >= e->devid_start && devid <= e->devid_end))
-			continue;
-		alloc_unity_mapping(dma_dom, e);
-	}
-}
-
-/*
  * This function checks if the driver got a valid device from the caller to
  * avoid dereferencing invalid pointers.
  */
@@ -454,22 +430,12 @@ static bool check_device(struct device *dev)
 
 static void init_iommu_group(struct device *dev)
 {
-	struct dma_ops_domain *dma_domain;
-	struct iommu_domain *domain;
 	struct iommu_group *group;
 
 	group = iommu_group_get_for_dev(dev);
 	if (IS_ERR(group))
 		return;
 
-	domain = iommu_group_default_domain(group);
-	if (!domain)
-		goto out;
-
-	dma_domain = to_pdomain(domain)->priv;
-
-	init_unity_mappings_for_device(dev, dma_domain);
-out:
 	iommu_group_put(group);
 }
 
@@ -1220,7 +1186,7 @@ static void domain_flush_complete(struct protection_domain *domain)
 	int i;
 
 	for (i = 0; i < amd_iommus_present; ++i) {
-		if (!domain->dev_iommu[i])
+		if (domain && !domain->dev_iommu[i])
 			continue;
 
 		/*
@@ -1397,8 +1363,9 @@ static u64 *fetch_pte(struct protection_domain *domain,
 static int iommu_map_page(struct protection_domain *dom,
 			  unsigned long bus_addr,
 			  unsigned long phys_addr,
+			  unsigned long page_size,
 			  int prot,
-			  unsigned long page_size)
+			  gfp_t gfp)
 {
 	u64 __pte, *pte;
 	int i, count;
@@ -1410,7 +1377,7 @@ static int iommu_map_page(struct protection_domain *dom,
 		return -EINVAL;
 
 	count = PAGE_SIZE_PTE_COUNT(page_size);
-	pte   = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
+	pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
 
 	if (!pte)
 		return -ENOMEM;
@@ -1474,320 +1441,37 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom,
 /****************************************************************************
  *
  * The next functions belong to the address allocator for the dma_ops
- * interface functions. They work like the allocators in the other IOMMU
- * drivers. Its basically a bitmap which marks the allocated pages in
- * the aperture. Maybe it could be enhanced in the future to a more
- * efficient allocator.
+ * interface functions.
  *
  ****************************************************************************/
 
-/*
- * The address allocator core functions.
- *
- * called with domain->lock held
- */
 
-/*
- * Used to reserve address ranges in the aperture (e.g. for exclusion
- * ranges.
- */
-static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
-				      unsigned long start_page,
-				      unsigned int pages)
+static unsigned long dma_ops_alloc_iova(struct device *dev,
+					struct dma_ops_domain *dma_dom,
+					unsigned int pages, u64 dma_mask)
 {
-	unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
-
-	if (start_page + pages > last_page)
-		pages = last_page - start_page;
-
-	for (i = start_page; i < start_page + pages; ++i) {
-		int index = i / APERTURE_RANGE_PAGES;
-		int page  = i % APERTURE_RANGE_PAGES;
-		__set_bit(page, dom->aperture[index]->bitmap);
-	}
-}
+	unsigned long pfn = 0;
 
-/*
- * This function is used to add a new aperture range to an existing
- * aperture in case of dma_ops domain allocation or address allocation
- * failure.
- */
-static int alloc_new_range(struct dma_ops_domain *dma_dom,
-			   bool populate, gfp_t gfp)
-{
-	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
-	unsigned long i, old_size, pte_pgsize;
-	struct aperture_range *range;
-	struct amd_iommu *iommu;
-	unsigned long flags;
+	pages = __roundup_pow_of_two(pages);
 
-#ifdef CONFIG_IOMMU_STRESS
-	populate = false;
-#endif
+	if (dma_mask > DMA_BIT_MASK(32))
+		pfn = alloc_iova_fast(&dma_dom->iovad, pages,
+				      IOVA_PFN(DMA_BIT_MASK(32)));
 
-	if (index >= APERTURE_MAX_RANGES)
-		return -ENOMEM;
-
-	range = kzalloc(sizeof(struct aperture_range), gfp);
-	if (!range)
-		return -ENOMEM;
-
-	range->bitmap = (void *)get_zeroed_page(gfp);
-	if (!range->bitmap)
-		goto out_free;
-
-	range->offset = dma_dom->aperture_size;
-
-	spin_lock_init(&range->bitmap_lock);
-
-	if (populate) {
-		unsigned long address = dma_dom->aperture_size;
-		int i, num_ptes = APERTURE_RANGE_PAGES / 512;
-		u64 *pte, *pte_page;
-
-		for (i = 0; i < num_ptes; ++i) {
-			pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
-					&pte_page, gfp);
-			if (!pte)
-				goto out_free;
-
-			range->pte_pages[i] = pte_page;
-
-			address += APERTURE_RANGE_SIZE / 64;
-		}
-	}
+	if (!pfn)
+		pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask));
 
-	spin_lock_irqsave(&dma_dom->domain.lock, flags);
-
-	/* First take the bitmap_lock and then publish the range */
-	spin_lock(&range->bitmap_lock);
-
-	old_size                 = dma_dom->aperture_size;
-	dma_dom->aperture[index] = range;
-	dma_dom->aperture_size  += APERTURE_RANGE_SIZE;
-
-	/* Reserve address range used for MSI messages */
-	if (old_size < MSI_ADDR_BASE_LO &&
-	    dma_dom->aperture_size > MSI_ADDR_BASE_LO) {
-		unsigned long spage;
-		int pages;
-
-		pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE);
-		spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT;
-
-		dma_ops_reserve_addresses(dma_dom, spage, pages);
-	}
-
-	/* Initialize the exclusion range if necessary */
-	for_each_iommu(iommu) {
-		if (iommu->exclusion_start &&
-		    iommu->exclusion_start >= dma_dom->aperture[index]->offset
-		    && iommu->exclusion_start < dma_dom->aperture_size) {
-			unsigned long startpage;
-			int pages = iommu_num_pages(iommu->exclusion_start,
-						    iommu->exclusion_length,
-						    PAGE_SIZE);
-			startpage = iommu->exclusion_start >> PAGE_SHIFT;
-			dma_ops_reserve_addresses(dma_dom, startpage, pages);
-		}
-	}
-
-	/*
-	 * Check for areas already mapped as present in the new aperture
-	 * range and mark those pages as reserved in the allocator. Such
-	 * mappings may already exist as a result of requested unity
-	 * mappings for devices.
-	 */
-	for (i = dma_dom->aperture[index]->offset;
-	     i < dma_dom->aperture_size;
-	     i += pte_pgsize) {
-		u64 *pte = fetch_pte(&dma_dom->domain, i, &pte_pgsize);
-		if (!pte || !IOMMU_PTE_PRESENT(*pte))
-			continue;
-
-		dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT,
-					  pte_pgsize >> 12);
-	}
-
-	update_domain(&dma_dom->domain);
-
-	spin_unlock(&range->bitmap_lock);
-
-	spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
-
-	return 0;
-
-out_free:
-	update_domain(&dma_dom->domain);
-
-	free_page((unsigned long)range->bitmap);
-
-	kfree(range);
-
-	return -ENOMEM;
+	return (pfn << PAGE_SHIFT);
 }
 
-static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
-					 struct aperture_range *range,
-					 unsigned long pages,
-					 unsigned long dma_mask,
-					 unsigned long boundary_size,
-					 unsigned long align_mask,
-					 bool trylock)
+static void dma_ops_free_iova(struct dma_ops_domain *dma_dom,
+			      unsigned long address,
+			      unsigned int pages)
 {
-	unsigned long offset, limit, flags;
-	dma_addr_t address;
-	bool flush = false;
-
-	offset = range->offset >> PAGE_SHIFT;
-	limit  = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
-					dma_mask >> PAGE_SHIFT);
-
-	if (trylock) {
-		if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
-			return -1;
-	} else {
-		spin_lock_irqsave(&range->bitmap_lock, flags);
-	}
-
-	address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
-				   pages, offset, boundary_size, align_mask);
-	if (address == -1) {
-		/* Nothing found, retry one time */
-		address = iommu_area_alloc(range->bitmap, limit,
-					   0, pages, offset, boundary_size,
-					   align_mask);
-		flush = true;
-	}
-
-	if (address != -1)
-		range->next_bit = address + pages;
-
-	spin_unlock_irqrestore(&range->bitmap_lock, flags);
-
-	if (flush) {
-		domain_flush_tlb(&dom->domain);
-		domain_flush_complete(&dom->domain);
-	}
-
-	return address;
-}
-
-static unsigned long dma_ops_area_alloc(struct device *dev,
-					struct dma_ops_domain *dom,
-					unsigned int pages,
-					unsigned long align_mask,
-					u64 dma_mask)
-{
-	unsigned long boundary_size, mask;
-	unsigned long address = -1;
-	bool first = true;
-	u32 start, i;
-
-	preempt_disable();
-
-	mask = dma_get_seg_boundary(dev);
-
-again:
-	start = this_cpu_read(*dom->next_index);
-
-	/* Sanity check - is it really necessary? */
-	if (unlikely(start > APERTURE_MAX_RANGES)) {
-		start = 0;
-		this_cpu_write(*dom->next_index, 0);
-	}
-
-	boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
-				   1UL << (BITS_PER_LONG - PAGE_SHIFT);
-
-	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
-		struct aperture_range *range;
-		int index;
-
-		index = (start + i) % APERTURE_MAX_RANGES;
-
-		range = dom->aperture[index];
-
-		if (!range || range->offset >= dma_mask)
-			continue;
-
-		address = dma_ops_aperture_alloc(dom, range, pages,
-						 dma_mask, boundary_size,
-						 align_mask, first);
-		if (address != -1) {
-			address = range->offset + (address << PAGE_SHIFT);
-			this_cpu_write(*dom->next_index, index);
-			break;
-		}
-	}
-
-	if (address == -1 && first) {
-		first = false;
-		goto again;
-	}
-
-	preempt_enable();
-
-	return address;
-}
-
-static unsigned long dma_ops_alloc_addresses(struct device *dev,
-					     struct dma_ops_domain *dom,
-					     unsigned int pages,
-					     unsigned long align_mask,
-					     u64 dma_mask)
-{
-	unsigned long address = -1;
-
-	while (address == -1) {
-		address = dma_ops_area_alloc(dev, dom, pages,
-					     align_mask, dma_mask);
-
-		if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
-			break;
-	}
-
-	if (unlikely(address == -1))
-		address = DMA_ERROR_CODE;
-
-	WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
-
-	return address;
-}
-
-/*
- * The address free function.
- *
- * called with domain->lock held
- */
-static void dma_ops_free_addresses(struct dma_ops_domain *dom,
-				   unsigned long address,
-				   unsigned int pages)
-{
-	unsigned i = address >> APERTURE_RANGE_SHIFT;
-	struct aperture_range *range = dom->aperture[i];
-	unsigned long flags;
-
-	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
-
-#ifdef CONFIG_IOMMU_STRESS
-	if (i < 4)
-		return;
-#endif
-
-	if (amd_iommu_unmap_flush) {
-		domain_flush_tlb(&dom->domain);
-		domain_flush_complete(&dom->domain);
-	}
-
-	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
-
-	spin_lock_irqsave(&range->bitmap_lock, flags);
-	if (address + pages > range->next_bit)
-		range->next_bit = address + pages;
-	bitmap_clear(range->bitmap, address, pages);
-	spin_unlock_irqrestore(&range->bitmap_lock, flags);
+	pages = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
 
+	free_iova_fast(&dma_dom->iovad, address, pages);
 }
 
 /****************************************************************************
@@ -1961,44 +1645,18 @@ static void free_gcr3_table(struct protection_domain *domain)
  */
 static void dma_ops_domain_free(struct dma_ops_domain *dom)
 {
-	int i;
-
 	if (!dom)
 		return;
 
-	free_percpu(dom->next_index);
-
 	del_domain_from_list(&dom->domain);
 
-	free_pagetable(&dom->domain);
+	put_iova_domain(&dom->iovad);
 
-	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
-		if (!dom->aperture[i])
-			continue;
-		free_page((unsigned long)dom->aperture[i]->bitmap);
-		kfree(dom->aperture[i]);
-	}
+	free_pagetable(&dom->domain);
 
 	kfree(dom);
 }
 
-static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
-					  int max_apertures)
-{
-	int ret, i, apertures;
-
-	apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
-	ret       = 0;
-
-	for (i = apertures; i < max_apertures; ++i) {
-		ret = alloc_new_range(dma_dom, false, GFP_KERNEL);
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-
 /*
  * Allocates a new protection domain usable for the dma_ops functions.
  * It also initializes the page table and the address allocator data
@@ -2007,7 +1665,6 @@ static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
 static struct dma_ops_domain *dma_ops_domain_alloc(void)
 {
 	struct dma_ops_domain *dma_dom;
-	int cpu;
 
 	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
 	if (!dma_dom)
@@ -2016,30 +1673,19 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	if (protection_domain_init(&dma_dom->domain))
 		goto free_dma_dom;
 
-	dma_dom->next_index = alloc_percpu(u32);
-	if (!dma_dom->next_index)
-		goto free_dma_dom;
-
-	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
+	dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
 	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
 	dma_dom->domain.flags = PD_DMA_OPS_MASK;
-	dma_dom->domain.priv = dma_dom;
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
 
-	add_domain_to_list(&dma_dom->domain);
+	init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
+			 IOVA_START_PFN, DMA_32BIT_PFN);
 
-	if (alloc_new_range(dma_dom, true, GFP_KERNEL))
-		goto free_dma_dom;
-
-	/*
-	 * mark the first page as allocated so we never return 0 as
-	 * a valid dma-address. So we can use 0 as error value
-	 */
-	dma_dom->aperture[0]->bitmap[0] = 1;
+	/* Initialize reserved ranges */
+	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
-	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(dma_dom->next_index, cpu) = 0;
+	add_domain_to_list(&dma_dom->domain);
 
 	return dma_dom;
 
@@ -2482,6 +2128,92 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
  *
  *****************************************************************************/
 
+static void __queue_flush(struct flush_queue *queue)
+{
+	struct protection_domain *domain;
+	unsigned long flags;
+	int idx;
+
+	/* First flush TLB of all known domains */
+	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
+	list_for_each_entry(domain, &amd_iommu_pd_list, list)
+		domain_flush_tlb(domain);
+	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
+
+	/* Wait until flushes have completed */
+	domain_flush_complete(NULL);
+
+	for (idx = 0; idx < queue->next; ++idx) {
+		struct flush_queue_entry *entry;
+
+		entry = queue->entries + idx;
+
+		free_iova_fast(&entry->dma_dom->iovad,
+				entry->iova_pfn,
+				entry->pages);
+
+		/* Not really necessary, just to make sure we catch any bugs */
+		entry->dma_dom = NULL;
+	}
+
+	queue->next = 0;
+}
+
+static void queue_flush_all(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+		unsigned long flags;
+
+		queue = per_cpu_ptr(&flush_queue, cpu);
+		spin_lock_irqsave(&queue->lock, flags);
+		if (queue->next > 0)
+			__queue_flush(queue);
+		spin_unlock_irqrestore(&queue->lock, flags);
+	}
+}
+
+static void queue_flush_timeout(unsigned long unsused)
+{
+	atomic_set(&queue_timer_on, 0);
+	queue_flush_all();
+}
+
+static void queue_add(struct dma_ops_domain *dma_dom,
+		      unsigned long address, unsigned long pages)
+{
+	struct flush_queue_entry *entry;
+	struct flush_queue *queue;
+	unsigned long flags;
+	int idx;
+
+	pages     = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
+
+	queue = get_cpu_ptr(&flush_queue);
+	spin_lock_irqsave(&queue->lock, flags);
+
+	if (queue->next == FLUSH_QUEUE_SIZE)
+		__queue_flush(queue);
+
+	idx   = queue->next++;
+	entry = queue->entries + idx;
+
+	entry->iova_pfn = address;
+	entry->pages    = pages;
+	entry->dma_dom  = dma_dom;
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
+		mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
+
+	put_cpu_ptr(&flush_queue);
+}
+
+
 /*
  * In the dma_ops path we only have the struct device. This function
  * finds the corresponding IOMMU, the protection domain and the
@@ -2492,16 +2224,11 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
 static struct protection_domain *get_domain(struct device *dev)
 {
 	struct protection_domain *domain;
-	struct iommu_domain *io_domain;
 
 	if (!check_device(dev))
 		return ERR_PTR(-EINVAL);
 
-	io_domain = iommu_get_domain_for_dev(dev);
-	if (!io_domain)
-		return NULL;
-
-	domain = to_pdomain(io_domain);
+	domain = get_dev_data(dev)->domain;
 	if (!dma_ops_domain(domain))
 		return ERR_PTR(-EBUSY);
 
@@ -2512,8 +2239,15 @@ static void update_device_table(struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 
-	list_for_each_entry(dev_data, &domain->dev_list, list)
+	list_for_each_entry(dev_data, &domain->dev_list, list) {
 		set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
+
+		if (dev_data->devid == dev_data->alias)
+			continue;
+
+		/* There is an alias, update device table entry for it */
+		set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled);
+	}
 }
 
 static void update_domain(struct protection_domain *domain)
@@ -2529,94 +2263,17 @@ static void update_domain(struct protection_domain *domain)
 	domain->updated = false;
 }
 
-/*
- * This function fetches the PTE for a given address in the aperture
- */
-static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
-			    unsigned long address)
-{
-	struct aperture_range *aperture;
-	u64 *pte, *pte_page;
-
-	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
-	if (!aperture)
-		return NULL;
-
-	pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
-	if (!pte) {
-		pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
-				GFP_ATOMIC);
-		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
-	} else
-		pte += PM_LEVEL_INDEX(0, address);
-
-	update_domain(&dom->domain);
-
-	return pte;
-}
-
-/*
- * This is the generic map function. It maps one 4kb page at paddr to
- * the given address in the DMA address space for the domain.
- */
-static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
-				     unsigned long address,
-				     phys_addr_t paddr,
-				     int direction)
+static int dir2prot(enum dma_data_direction direction)
 {
-	u64 *pte, __pte;
-
-	WARN_ON(address > dom->aperture_size);
-
-	paddr &= PAGE_MASK;
-
-	pte  = dma_ops_get_pte(dom, address);
-	if (!pte)
-		return DMA_ERROR_CODE;
-
-	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
-
 	if (direction == DMA_TO_DEVICE)
-		__pte |= IOMMU_PTE_IR;
+		return IOMMU_PROT_IR;
 	else if (direction == DMA_FROM_DEVICE)
-		__pte |= IOMMU_PTE_IW;
+		return IOMMU_PROT_IW;
 	else if (direction == DMA_BIDIRECTIONAL)
-		__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
-
-	WARN_ON_ONCE(*pte);
-
-	*pte = __pte;
-
-	return (dma_addr_t)address;
-}
-
-/*
- * The generic unmapping function for on page in the DMA address space.
- */
-static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
-				 unsigned long address)
-{
-	struct aperture_range *aperture;
-	u64 *pte;
-
-	if (address >= dom->aperture_size)
-		return;
-
-	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
-	if (!aperture)
-		return;
-
-	pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
-	if (!pte)
-		return;
-
-	pte += PM_LEVEL_INDEX(0, address);
-
-	WARN_ON_ONCE(!*pte);
-
-	*pte = 0ULL;
+		return IOMMU_PROT_IW | IOMMU_PROT_IR;
+	else
+		return 0;
 }
-
 /*
  * This function contains common code for mapping of a physically
  * contiguous memory region into DMA address space. It is used by all
@@ -2627,32 +2284,29 @@ static dma_addr_t __map_single(struct device *dev,
 			       struct dma_ops_domain *dma_dom,
 			       phys_addr_t paddr,
 			       size_t size,
-			       int dir,
-			       bool align,
+			       enum dma_data_direction direction,
 			       u64 dma_mask)
 {
 	dma_addr_t offset = paddr & ~PAGE_MASK;
 	dma_addr_t address, start, ret;
 	unsigned int pages;
-	unsigned long align_mask = 0;
+	int prot = 0;
 	int i;
 
 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
 	paddr &= PAGE_MASK;
 
-	if (align)
-		align_mask = (1UL << get_order(size)) - 1;
-
-	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
-					  dma_mask);
-
+	address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
 	if (address == DMA_ERROR_CODE)
 		goto out;
 
+	prot = dir2prot(direction);
+
 	start = address;
 	for (i = 0; i < pages; ++i) {
-		ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
-		if (ret == DMA_ERROR_CODE)
+		ret = iommu_map_page(&dma_dom->domain, start, paddr,
+				     PAGE_SIZE, prot, GFP_ATOMIC);
+		if (ret)
 			goto out_unmap;
 
 		paddr += PAGE_SIZE;
@@ -2672,10 +2326,13 @@ out_unmap:
 
 	for (--i; i >= 0; --i) {
 		start -= PAGE_SIZE;
-		dma_ops_domain_unmap(dma_dom, start);
+		iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
 	}
 
-	dma_ops_free_addresses(dma_dom, address, pages);
+	domain_flush_tlb(&dma_dom->domain);
+	domain_flush_complete(&dma_dom->domain);
+
+	dma_ops_free_iova(dma_dom, address, pages);
 
 	return DMA_ERROR_CODE;
 }
@@ -2693,21 +2350,23 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 	dma_addr_t i, start;
 	unsigned int pages;
 
-	if ((dma_addr == DMA_ERROR_CODE) ||
-	    (dma_addr + size > dma_dom->aperture_size))
-		return;
-
 	flush_addr = dma_addr;
 	pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 	dma_addr &= PAGE_MASK;
 	start = dma_addr;
 
 	for (i = 0; i < pages; ++i) {
-		dma_ops_domain_unmap(dma_dom, start);
+		iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
 		start += PAGE_SIZE;
 	}
 
-	dma_ops_free_addresses(dma_dom, dma_addr, pages);
+	if (amd_iommu_unmap_flush) {
+		dma_ops_free_iova(dma_dom, dma_addr, pages);
+		domain_flush_tlb(&dma_dom->domain);
+		domain_flush_complete(&dma_dom->domain);
+	} else {
+		queue_add(dma_dom, dma_addr, pages);
+	}
 }
 
 /*
@@ -2720,6 +2379,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 {
 	phys_addr_t paddr = page_to_phys(page) + offset;
 	struct protection_domain *domain;
+	struct dma_ops_domain *dma_dom;
 	u64 dma_mask;
 
 	domain = get_domain(dev);
@@ -2729,9 +2389,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 		return DMA_ERROR_CODE;
 
 	dma_mask = *dev->dma_mask;
+	dma_dom = to_dma_ops_domain(domain);
 
-	return __map_single(dev, domain->priv, paddr, size, dir, false,
-			    dma_mask);
+	return __map_single(dev, dma_dom, paddr, size, dir, dma_mask);
 }
 
 /*
@@ -2741,12 +2401,41 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 		       enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	struct protection_domain *domain;
+	struct dma_ops_domain *dma_dom;
 
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return;
 
-	__unmap_single(domain->priv, dma_addr, size, dir);
+	dma_dom = to_dma_ops_domain(domain);
+
+	__unmap_single(dma_dom, dma_addr, size, dir);
+}
+
+static int sg_num_pages(struct device *dev,
+			struct scatterlist *sglist,
+			int nelems)
+{
+	unsigned long mask, boundary_size;
+	struct scatterlist *s;
+	int i, npages = 0;
+
+	mask          = dma_get_seg_boundary(dev);
+	boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
+				   1UL << (BITS_PER_LONG - PAGE_SHIFT);
+
+	for_each_sg(sglist, s, nelems, i) {
+		int p, n;
+
+		s->dma_address = npages << PAGE_SHIFT;
+		p = npages % boundary_size;
+		n = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
+		if (p + n > boundary_size)
+			npages += boundary_size - p;
+		npages += n;
+	}
+
+	return npages;
 }
 
 /*
@@ -2754,46 +2443,79 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
  * lists).
  */
 static int map_sg(struct device *dev, struct scatterlist *sglist,
-		  int nelems, enum dma_data_direction dir,
+		  int nelems, enum dma_data_direction direction,
 		  struct dma_attrs *attrs)
 {
+	int mapped_pages = 0, npages = 0, prot = 0, i;
 	struct protection_domain *domain;
-	int i;
+	struct dma_ops_domain *dma_dom;
 	struct scatterlist *s;
-	phys_addr_t paddr;
-	int mapped_elems = 0;
+	unsigned long address;
 	u64 dma_mask;
 
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return 0;
 
+	dma_dom  = to_dma_ops_domain(domain);
 	dma_mask = *dev->dma_mask;
 
+	npages = sg_num_pages(dev, sglist, nelems);
+
+	address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
+	if (address == DMA_ERROR_CODE)
+		goto out_err;
+
+	prot = dir2prot(direction);
+
+	/* Map all sg entries */
 	for_each_sg(sglist, s, nelems, i) {
-		paddr = sg_phys(s);
+		int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
 
-		s->dma_address = __map_single(dev, domain->priv,
-					      paddr, s->length, dir, false,
-					      dma_mask);
+		for (j = 0; j < pages; ++j) {
+			unsigned long bus_addr, phys_addr;
+			int ret;
 
-		if (s->dma_address) {
-			s->dma_length = s->length;
-			mapped_elems++;
-		} else
-			goto unmap;
+			bus_addr  = address + s->dma_address + (j << PAGE_SHIFT);
+			phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
+			ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC);
+			if (ret)
+				goto out_unmap;
+
+			mapped_pages += 1;
+		}
 	}
 
-	return mapped_elems;
+	/* Everything is mapped - write the right values into s->dma_address */
+	for_each_sg(sglist, s, nelems, i) {
+		s->dma_address += address + s->offset;
+		s->dma_length   = s->length;
+	}
+
+	return nelems;
+
+out_unmap:
+	pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
+	       dev_name(dev), npages);
 
-unmap:
-	for_each_sg(sglist, s, mapped_elems, i) {
-		if (s->dma_address)
-			__unmap_single(domain->priv, s->dma_address,
-				       s->dma_length, dir);
-		s->dma_address = s->dma_length = 0;
+	for_each_sg(sglist, s, nelems, i) {
+		int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
+
+		for (j = 0; j < pages; ++j) {
+			unsigned long bus_addr;
+
+			bus_addr  = address + s->dma_address + (j << PAGE_SHIFT);
+			iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
+
+			if (--mapped_pages)
+				goto out_free_iova;
+		}
 	}
 
+out_free_iova:
+	free_iova_fast(&dma_dom->iovad, address, npages);
+
+out_err:
 	return 0;
 }
 
@@ -2806,18 +2528,19 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		     struct dma_attrs *attrs)
 {
 	struct protection_domain *domain;
-	struct scatterlist *s;
-	int i;
+	struct dma_ops_domain *dma_dom;
+	unsigned long startaddr;
+	int npages = 2;
 
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return;
 
-	for_each_sg(sglist, s, nelems, i) {
-		__unmap_single(domain->priv, s->dma_address,
-			       s->dma_length, dir);
-		s->dma_address = s->dma_length = 0;
-	}
+	startaddr = sg_dma_address(sglist) & PAGE_MASK;
+	dma_dom   = to_dma_ops_domain(domain);
+	npages    = sg_num_pages(dev, sglist, nelems);
+
+	__unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir);
 }
 
 /*
@@ -2829,6 +2552,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 {
 	u64 dma_mask = dev->coherent_dma_mask;
 	struct protection_domain *domain;
+	struct dma_ops_domain *dma_dom;
 	struct page *page;
 
 	domain = get_domain(dev);
@@ -2839,6 +2563,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	} else if (IS_ERR(domain))
 		return NULL;
 
+	dma_dom   = to_dma_ops_domain(domain);
 	size	  = PAGE_ALIGN(size);
 	dma_mask  = dev->coherent_dma_mask;
 	flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -2858,8 +2583,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	if (!dma_mask)
 		dma_mask = *dev->dma_mask;
 
-	*dma_addr = __map_single(dev, domain->priv, page_to_phys(page),
-				 size, DMA_BIDIRECTIONAL, true, dma_mask);
+	*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
+				 size, DMA_BIDIRECTIONAL, dma_mask);
 
 	if (*dma_addr == DMA_ERROR_CODE)
 		goto out_free;
@@ -2882,6 +2607,7 @@ static void free_coherent(struct device *dev, size_t size,
 			  struct dma_attrs *attrs)
 {
 	struct protection_domain *domain;
+	struct dma_ops_domain *dma_dom;
 	struct page *page;
 
 	page = virt_to_page(virt_addr);
@@ -2891,7 +2617,9 @@ static void free_coherent(struct device *dev, size_t size,
 	if (IS_ERR(domain))
 		goto free_mem;
 
-	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
+	dma_dom = to_dma_ops_domain(domain);
+
+	__unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
 
 free_mem:
 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
@@ -2907,48 +2635,92 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
 	return check_device(dev);
 }
 
-static int set_dma_mask(struct device *dev, u64 mask)
+static struct dma_map_ops amd_iommu_dma_ops = {
+	.alloc		= alloc_coherent,
+	.free		= free_coherent,
+	.map_page	= map_page,
+	.unmap_page	= unmap_page,
+	.map_sg		= map_sg,
+	.unmap_sg	= unmap_sg,
+	.dma_supported	= amd_iommu_dma_supported,
+};
+
+static int init_reserved_iova_ranges(void)
 {
-	struct protection_domain *domain;
-	int max_apertures = 1;
+	struct pci_dev *pdev = NULL;
+	struct iova *val;
 
-	domain = get_domain(dev);
-	if (IS_ERR(domain))
-		return PTR_ERR(domain);
+	init_iova_domain(&reserved_iova_ranges, PAGE_SIZE,
+			 IOVA_START_PFN, DMA_32BIT_PFN);
 
-	if (mask == DMA_BIT_MASK(64))
-		max_apertures = 8;
-	else if (mask > DMA_BIT_MASK(32))
-		max_apertures = 4;
+	lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
+			  &reserved_rbtree_key);
+
+	/* MSI memory range */
+	val = reserve_iova(&reserved_iova_ranges,
+			   IOVA_PFN(MSI_RANGE_START), IOVA_PFN(MSI_RANGE_END));
+	if (!val) {
+		pr_err("Reserving MSI range failed\n");
+		return -ENOMEM;
+	}
+
+	/* HT memory range */
+	val = reserve_iova(&reserved_iova_ranges,
+			   IOVA_PFN(HT_RANGE_START), IOVA_PFN(HT_RANGE_END));
+	if (!val) {
+		pr_err("Reserving HT range failed\n");
+		return -ENOMEM;
+	}
 
 	/*
-	 * To prevent lock contention it doesn't make sense to allocate more
-	 * apertures than online cpus
+	 * Memory used for PCI resources
+	 * FIXME: Check whether we can reserve the PCI-hole completly
 	 */
-	if (max_apertures > num_online_cpus())
-		max_apertures = num_online_cpus();
+	for_each_pci_dev(pdev) {
+		int i;
+
+		for (i = 0; i < PCI_NUM_RESOURCES; ++i) {
+			struct resource *r = &pdev->resource[i];
+
+			if (!(r->flags & IORESOURCE_MEM))
+				continue;
 
-	if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures))
-		dev_err(dev, "Can't allocate %d iommu apertures\n",
-			max_apertures);
+			val = reserve_iova(&reserved_iova_ranges,
+					   IOVA_PFN(r->start),
+					   IOVA_PFN(r->end));
+			if (!val) {
+				pr_err("Reserve pci-resource range failed\n");
+				return -ENOMEM;
+			}
+		}
+	}
 
 	return 0;
 }
 
-static struct dma_map_ops amd_iommu_dma_ops = {
-	.alloc		= alloc_coherent,
-	.free		= free_coherent,
-	.map_page	= map_page,
-	.unmap_page	= unmap_page,
-	.map_sg		= map_sg,
-	.unmap_sg	= unmap_sg,
-	.dma_supported	= amd_iommu_dma_supported,
-	.set_dma_mask	= set_dma_mask,
-};
-
 int __init amd_iommu_init_api(void)
 {
-	int err = 0;
+	int ret, cpu, err = 0;
+
+	ret = iova_cache_get();
+	if (ret)
+		return ret;
+
+	ret = init_reserved_iova_ranges();
+	if (ret)
+		return ret;
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
+
+		queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
+					 sizeof(*queue->entries),
+					 GFP_KERNEL);
+		if (!queue->entries)
+			goto out_put_iova;
+
+		spin_lock_init(&queue->lock);
+	}
 
 	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
 	if (err)
@@ -2958,11 +2730,26 @@ int __init amd_iommu_init_api(void)
 	if (err)
 		return err;
 #endif
+	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
+	if (err)
+		return err;
 	return 0;
+
+out_put_iova:
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
+
+		kfree(queue->entries);
+	}
+
+	return -ENOMEM;
 }
 
 int __init amd_iommu_init_dma_ops(void)
 {
+	setup_timer(&queue_timer, queue_flush_timeout, 0);
+	atomic_set(&queue_timer_on, 0);
+
 	swiotlb        = iommu_pass_through ? 1 : 0;
 	iommu_detected = 1;
 
@@ -2981,6 +2768,7 @@ int __init amd_iommu_init_dma_ops(void)
 		pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
 
 	return 0;
+
 }
 
 /*****************************************************************************
@@ -3103,9 +2891,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 static void amd_iommu_domain_free(struct iommu_domain *dom)
 {
 	struct protection_domain *domain;
-
-	if (!dom)
-		return;
+	struct dma_ops_domain *dma_dom;
 
 	domain = to_pdomain(dom);
 
@@ -3114,13 +2900,31 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
 
 	BUG_ON(domain->dev_cnt != 0);
 
-	if (domain->mode != PAGE_MODE_NONE)
-		free_pagetable(domain);
+	if (!dom)
+		return;
+
+	switch (dom->type) {
+	case IOMMU_DOMAIN_DMA:
+		/*
+		 * First make sure the domain is no longer referenced from the
+		 * flush queue
+		 */
+		queue_flush_all();
 
-	if (domain->flags & PD_IOMMUV2_MASK)
-		free_gcr3_table(domain);
+		/* Now release the domain */
+		dma_dom = to_dma_ops_domain(domain);
+		dma_ops_domain_free(dma_dom);
+		break;
+	default:
+		if (domain->mode != PAGE_MODE_NONE)
+			free_pagetable(domain);
+
+		if (domain->flags & PD_IOMMUV2_MASK)
+			free_gcr3_table(domain);
 
-	protection_domain_free(domain);
+		protection_domain_free(domain);
+		break;
+	}
 }
 
 static void amd_iommu_detach_device(struct iommu_domain *dom,
@@ -3190,7 +2994,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 		prot |= IOMMU_PROT_IW;
 
 	mutex_lock(&domain->api_lock);
-	ret = iommu_map_page(domain, iova, paddr, prot, page_size);
+	ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL);
 	mutex_unlock(&domain->api_lock);
 
 	return ret;
@@ -3292,6 +3096,19 @@ static void amd_iommu_put_dm_regions(struct device *dev,
 		kfree(entry);
 }
 
+static void amd_iommu_apply_dm_region(struct device *dev,
+				      struct iommu_domain *domain,
+				      struct iommu_dm_region *region)
+{
+	struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain));
+	unsigned long start, end;
+
+	start = IOVA_PFN(region->start);
+	end   = IOVA_PFN(region->start + region->length);
+
+	WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
+}
+
 static const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
 	.domain_alloc = amd_iommu_domain_alloc,
@@ -3307,6 +3124,7 @@ static const struct iommu_ops amd_iommu_ops = {
 	.device_group = amd_iommu_device_group,
 	.get_dm_regions = amd_iommu_get_dm_regions,
 	.put_dm_regions = amd_iommu_put_dm_regions,
+	.apply_dm_region = amd_iommu_apply_dm_region,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };
 
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 590956ac704e..caf5e3822715 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -421,7 +421,6 @@ struct protection_domain {
 	bool updated;		/* complete domain flush required */
 	unsigned dev_cnt;	/* devices assigned to this domain */
 	unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
-	void *priv;             /* private data */
 };
 
 /*
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 56999d2fac07..0d52ceb0846d 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -961,7 +961,7 @@ static int __init amd_iommu_v2_init(void)
 	spin_lock_init(&state_lock);
 
 	ret = -ENOMEM;
-	iommu_wq = create_workqueue("amd_iommu_v2");
+	iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
 	if (iommu_wq == NULL)
 		goto out;
 
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 6a86b5d1defa..1344e29acd86 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -241,8 +241,20 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
 		if (!dmar_match_pci_path(info, scope->bus, path, level))
 			continue;
 
-		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^
-		    (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) {
+		/*
+		 * We expect devices with endpoint scope to have normal PCI
+		 * headers, and devices with bridge scope to have bridge PCI
+		 * headers.  However PCI NTB devices may be listed in the
+		 * DMAR table with bridge scope, even though they have a
+		 * normal PCI header.  NTB devices are identified by class
+		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
+		 * for this special case.
+		 */
+		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
+		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
+		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
+		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+		      info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) {
 			pr_warn("Device scope type does not match for %s\n",
 				pci_name(info->dev));
 			return -EINVAL;
@@ -1155,8 +1167,6 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 				(unsigned long long)qi->desc[index].high);
 			memcpy(&qi->desc[index], &qi->desc[wait_index],
 					sizeof(struct qi_desc));
-			__iommu_flush_cache(iommu, &qi->desc[index],
-					sizeof(struct qi_desc));
 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
 			return -EINVAL;
 		}
@@ -1231,9 +1241,6 @@ restart:
 
 	hw[wait_index] = wait_desc;
 
-	__iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
-	__iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
-
 	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
 	qi->free_cnt -= 2;
 
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 5ecc86cb74c8..33dcc29ec200 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -54,6 +54,10 @@ typedef u32 sysmmu_pte_t;
 #define lv2ent_small(pent) ((*(pent) & 2) == 2)
 #define lv2ent_large(pent) ((*(pent) & 3) == 1)
 
+#ifdef CONFIG_BIG_ENDIAN
+#warning "revisit driver if we can enable big-endian ptes"
+#endif
+
 /*
  * v1.x - v3.x SYSMMU supports 32bit physical and 32bit virtual address spaces
  * v5.0 introduced support for 36bit physical address space by shifting
@@ -322,14 +326,27 @@ static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd)
 	__sysmmu_tlb_invalidate(data);
 }
 
+static void __sysmmu_enable_clocks(struct sysmmu_drvdata *data)
+{
+	BUG_ON(clk_prepare_enable(data->clk_master));
+	BUG_ON(clk_prepare_enable(data->clk));
+	BUG_ON(clk_prepare_enable(data->pclk));
+	BUG_ON(clk_prepare_enable(data->aclk));
+}
+
+static void __sysmmu_disable_clocks(struct sysmmu_drvdata *data)
+{
+	clk_disable_unprepare(data->aclk);
+	clk_disable_unprepare(data->pclk);
+	clk_disable_unprepare(data->clk);
+	clk_disable_unprepare(data->clk_master);
+}
+
 static void __sysmmu_get_version(struct sysmmu_drvdata *data)
 {
 	u32 ver;
 
-	clk_enable(data->clk_master);
-	clk_enable(data->clk);
-	clk_enable(data->pclk);
-	clk_enable(data->aclk);
+	__sysmmu_enable_clocks(data);
 
 	ver = readl(data->sfrbase + REG_MMU_VERSION);
 
@@ -342,10 +359,7 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data)
 	dev_dbg(data->sysmmu, "hardware version: %d.%d\n",
 		MMU_MAJ_VER(data->version), MMU_MIN_VER(data->version));
 
-	clk_disable(data->aclk);
-	clk_disable(data->pclk);
-	clk_disable(data->clk);
-	clk_disable(data->clk_master);
+	__sysmmu_disable_clocks(data);
 }
 
 static void show_fault_information(struct sysmmu_drvdata *data,
@@ -427,10 +441,7 @@ static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
 	writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
 	writel(0, data->sfrbase + REG_MMU_CFG);
 
-	clk_disable(data->aclk);
-	clk_disable(data->pclk);
-	clk_disable(data->clk);
-	clk_disable(data->clk_master);
+	__sysmmu_disable_clocks(data);
 }
 
 static bool __sysmmu_disable(struct sysmmu_drvdata *data)
@@ -475,10 +486,7 @@ static void __sysmmu_init_config(struct sysmmu_drvdata *data)
 
 static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
 {
-	clk_enable(data->clk_master);
-	clk_enable(data->clk);
-	clk_enable(data->pclk);
-	clk_enable(data->aclk);
+	__sysmmu_enable_clocks(data);
 
 	writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
 
@@ -488,6 +496,12 @@ static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
 
 	writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
 
+	/*
+	 * SYSMMU driver keeps master's clock enabled only for the short
+	 * time, while accessing the registers. For performing address
+	 * translation during DMA transaction it relies on the client
+	 * driver to enable it.
+	 */
 	clk_disable(data->clk_master);
 }
 
@@ -524,16 +538,15 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
 {
 	unsigned long flags;
 
-	clk_enable(data->clk_master);
 
 	spin_lock_irqsave(&data->lock, flags);
-	if (is_sysmmu_active(data)) {
-		if (data->version >= MAKE_MMU_VER(3, 3))
-			__sysmmu_tlb_invalidate_entry(data, iova, 1);
+	if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) {
+		clk_enable(data->clk_master);
+		__sysmmu_tlb_invalidate_entry(data, iova, 1);
+		clk_disable(data->clk_master);
 	}
 	spin_unlock_irqrestore(&data->lock, flags);
 
-	clk_disable(data->clk_master);
 }
 
 static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
@@ -572,6 +585,8 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
 	spin_unlock_irqrestore(&data->lock, flags);
 }
 
+static struct iommu_ops exynos_iommu_ops;
+
 static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 {
 	int irq, ret;
@@ -602,37 +617,22 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 	}
 
 	data->clk = devm_clk_get(dev, "sysmmu");
-	if (!IS_ERR(data->clk)) {
-		ret = clk_prepare(data->clk);
-		if (ret) {
-			dev_err(dev, "Failed to prepare clk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->clk) == -ENOENT)
 		data->clk = NULL;
-	}
+	else if (IS_ERR(data->clk))
+		return PTR_ERR(data->clk);
 
 	data->aclk = devm_clk_get(dev, "aclk");
-	if (!IS_ERR(data->aclk)) {
-		ret = clk_prepare(data->aclk);
-		if (ret) {
-			dev_err(dev, "Failed to prepare aclk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->aclk) == -ENOENT)
 		data->aclk = NULL;
-	}
+	else if (IS_ERR(data->aclk))
+		return PTR_ERR(data->aclk);
 
 	data->pclk = devm_clk_get(dev, "pclk");
-	if (!IS_ERR(data->pclk)) {
-		ret = clk_prepare(data->pclk);
-		if (ret) {
-			dev_err(dev, "Failed to prepare pclk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->pclk) == -ENOENT)
 		data->pclk = NULL;
-	}
+	else if (IS_ERR(data->pclk))
+		return PTR_ERR(data->pclk);
 
 	if (!data->clk && (!data->aclk || !data->pclk)) {
 		dev_err(dev, "Failed to get device clock(s)!\n");
@@ -640,15 +640,10 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 	}
 
 	data->clk_master = devm_clk_get(dev, "master");
-	if (!IS_ERR(data->clk_master)) {
-		ret = clk_prepare(data->clk_master);
-		if (ret) {
-			dev_err(dev, "Failed to prepare master's clk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->clk_master) == -ENOENT)
 		data->clk_master = NULL;
-	}
+	else if (IS_ERR(data->clk_master))
+		return PTR_ERR(data->clk_master);
 
 	data->sysmmu = dev;
 	spin_lock_init(&data->lock);
@@ -665,6 +660,8 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(dev);
 
+	of_iommu_set_ops(dev->of_node, &exynos_iommu_ops);
+
 	return 0;
 }
 
@@ -709,6 +706,7 @@ static struct platform_driver exynos_sysmmu_driver __refdata = {
 		.name		= "exynos-sysmmu",
 		.of_match_table	= sysmmu_of_match,
 		.pm		= &sysmmu_pm_ops,
+		.suppress_bind_attrs = true,
 	}
 };
 
@@ -716,7 +714,7 @@ static inline void update_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
 {
 	dma_sync_single_for_cpu(dma_dev, virt_to_phys(ent), sizeof(*ent),
 				DMA_TO_DEVICE);
-	*ent = val;
+	*ent = cpu_to_le32(val);
 	dma_sync_single_for_device(dma_dev, virt_to_phys(ent), sizeof(*ent),
 				   DMA_TO_DEVICE);
 }
@@ -1357,7 +1355,6 @@ static int __init exynos_iommu_of_setup(struct device_node *np)
 	if (!dma_dev)
 		dma_dev = &pdev->dev;
 
-	of_iommu_set_ops(np, &exynos_iommu_ops);
 	return 0;
 }
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 323dac9900ba..9d2938e12aa3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1672,7 +1672,7 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 		return -ENOMEM;
 	}
 
-	size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
+	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
 	iommu->domains = kzalloc(size, GFP_KERNEL);
 
 	if (iommu->domains) {
@@ -1737,7 +1737,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 static void free_dmar_iommu(struct intel_iommu *iommu)
 {
 	if ((iommu->domains) && (iommu->domain_ids)) {
-		int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
+		int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
 		int i;
 
 		for (i = 0; i < elems; i++)
@@ -2076,7 +2076,7 @@ out_unlock:
 	spin_unlock(&iommu->lock);
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
-	return 0;
+	return ret;
 }
 
 struct domain_context_mapping_data {
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3000051f48b4..b06d93594436 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -34,8 +34,7 @@
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
-static struct ida iommu_group_ida;
-static struct mutex iommu_group_mutex;
+static DEFINE_IDA(iommu_group_ida);
 
 struct iommu_callback_data {
 	const struct iommu_ops *ops;
@@ -144,9 +143,7 @@ static void iommu_group_release(struct kobject *kobj)
 	if (group->iommu_data_release)
 		group->iommu_data_release(group->iommu_data);
 
-	mutex_lock(&iommu_group_mutex);
-	ida_remove(&iommu_group_ida, group->id);
-	mutex_unlock(&iommu_group_mutex);
+	ida_simple_remove(&iommu_group_ida, group->id);
 
 	if (group->default_domain)
 		iommu_domain_free(group->default_domain);
@@ -186,26 +183,17 @@ struct iommu_group *iommu_group_alloc(void)
 	INIT_LIST_HEAD(&group->devices);
 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
 
-	mutex_lock(&iommu_group_mutex);
-
-again:
-	if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
+	ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0) {
 		kfree(group);
-		mutex_unlock(&iommu_group_mutex);
-		return ERR_PTR(-ENOMEM);
+		return ERR_PTR(ret);
 	}
-
-	if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
-		goto again;
-
-	mutex_unlock(&iommu_group_mutex);
+	group->id = ret;
 
 	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
 				   NULL, "%d", group->id);
 	if (ret) {
-		mutex_lock(&iommu_group_mutex);
-		ida_remove(&iommu_group_ida, group->id);
-		mutex_unlock(&iommu_group_mutex);
+		ida_simple_remove(&iommu_group_ida, group->id);
 		kfree(group);
 		return ERR_PTR(ret);
 	}
@@ -348,6 +336,9 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group,
 	list_for_each_entry(entry, &mappings, list) {
 		dma_addr_t start, end, addr;
 
+		if (domain->ops->apply_dm_region)
+			domain->ops->apply_dm_region(dev, domain, entry);
+
 		start = ALIGN(entry->start, pg_size);
 		end   = ALIGN(entry->start + entry->length, pg_size);
 
@@ -1483,9 +1474,6 @@ static int __init iommu_init(void)
 {
 	iommu_group_kset = kset_create_and_add("iommu_groups",
 					       NULL, kernel_kobj);
-	ida_init(&iommu_group_ida);
-	mutex_init(&iommu_group_mutex);
-
 	BUG_ON(!iommu_group_kset);
 
 	return 0;
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index e321fa517a45..b09692bb5b0a 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -27,32 +27,35 @@
 #include <linux/slab.h>
 #include <linux/iommu.h>
 #include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/of_iommu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sizes.h>
 
 #include "msm_iommu_hw-8xxx.h"
 #include "msm_iommu.h"
+#include "io-pgtable.h"
 
 #define MRC(reg, processor, op1, crn, crm, op2)				\
 __asm__ __volatile__ (							\
 "   mrc   "   #processor "," #op1 ", %0,"  #crn "," #crm "," #op2 "\n"  \
 : "=r" (reg))
 
-#define RCP15_PRRR(reg)		MRC(reg, p15, 0, c10, c2, 0)
-#define RCP15_NMRR(reg)		MRC(reg, p15, 0, c10, c2, 1)
-
 /* bitmap of the page sizes currently supported */
 #define MSM_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
 
-static int msm_iommu_tex_class[4];
-
 DEFINE_SPINLOCK(msm_iommu_lock);
+static LIST_HEAD(qcom_iommu_devices);
+static struct iommu_ops msm_iommu_ops;
 
 struct msm_priv {
-	unsigned long *pgtable;
 	struct list_head list_attached;
 	struct iommu_domain domain;
+	struct io_pgtable_cfg	cfg;
+	struct io_pgtable_ops	*iop;
+	struct device		*dev;
+	spinlock_t		pgtlock; /* pagetable lock */
 };
 
 static struct msm_priv *to_msm_priv(struct iommu_domain *dom)
@@ -60,67 +63,183 @@ static struct msm_priv *to_msm_priv(struct iommu_domain *dom)
 	return container_of(dom, struct msm_priv, domain);
 }
 
-static int __enable_clocks(struct msm_iommu_drvdata *drvdata)
+static int __enable_clocks(struct msm_iommu_dev *iommu)
 {
 	int ret;
 
-	ret = clk_enable(drvdata->pclk);
+	ret = clk_enable(iommu->pclk);
 	if (ret)
 		goto fail;
 
-	if (drvdata->clk) {
-		ret = clk_enable(drvdata->clk);
+	if (iommu->clk) {
+		ret = clk_enable(iommu->clk);
 		if (ret)
-			clk_disable(drvdata->pclk);
+			clk_disable(iommu->pclk);
 	}
 fail:
 	return ret;
 }
 
-static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
+static void __disable_clocks(struct msm_iommu_dev *iommu)
 {
-	clk_disable(drvdata->clk);
-	clk_disable(drvdata->pclk);
+	if (iommu->clk)
+		clk_disable(iommu->clk);
+	clk_disable(iommu->pclk);
 }
 
-static int __flush_iotlb(struct iommu_domain *domain)
+static void msm_iommu_reset(void __iomem *base, int ncb)
 {
-	struct msm_priv *priv = to_msm_priv(domain);
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
-	int ret = 0;
-#ifndef CONFIG_IOMMU_PGTABLES_L2
-	unsigned long *fl_table = priv->pgtable;
-	int i;
-
-	if (!list_empty(&priv->list_attached)) {
-		dmac_flush_range(fl_table, fl_table + SZ_16K);
-
-		for (i = 0; i < NUM_FL_PTE; i++)
-			if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
-				void *sl_table = __va(fl_table[i] &
-								FL_BASE_MASK);
-				dmac_flush_range(sl_table, sl_table + SZ_4K);
-			}
+	int ctx;
+
+	SET_RPUE(base, 0);
+	SET_RPUEIE(base, 0);
+	SET_ESRRESTORE(base, 0);
+	SET_TBE(base, 0);
+	SET_CR(base, 0);
+	SET_SPDMBE(base, 0);
+	SET_TESTBUSCR(base, 0);
+	SET_TLBRSW(base, 0);
+	SET_GLOBAL_TLBIALL(base, 0);
+	SET_RPU_ACR(base, 0);
+	SET_TLBLKCRWE(base, 1);
+
+	for (ctx = 0; ctx < ncb; ctx++) {
+		SET_BPRCOSH(base, ctx, 0);
+		SET_BPRCISH(base, ctx, 0);
+		SET_BPRCNSH(base, ctx, 0);
+		SET_BPSHCFG(base, ctx, 0);
+		SET_BPMTCFG(base, ctx, 0);
+		SET_ACTLR(base, ctx, 0);
+		SET_SCTLR(base, ctx, 0);
+		SET_FSRRESTORE(base, ctx, 0);
+		SET_TTBR0(base, ctx, 0);
+		SET_TTBR1(base, ctx, 0);
+		SET_TTBCR(base, ctx, 0);
+		SET_BFBCR(base, ctx, 0);
+		SET_PAR(base, ctx, 0);
+		SET_FAR(base, ctx, 0);
+		SET_CTX_TLBIALL(base, ctx, 0);
+		SET_TLBFLPTER(base, ctx, 0);
+		SET_TLBSLPTER(base, ctx, 0);
+		SET_TLBLKCR(base, ctx, 0);
+		SET_CONTEXTIDR(base, ctx, 0);
 	}
-#endif
+}
 
-	list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
+static void __flush_iotlb(void *cookie)
+{
+	struct msm_priv *priv = cookie;
+	struct msm_iommu_dev *iommu = NULL;
+	struct msm_iommu_ctx_dev *master;
+	int ret = 0;
+
+	list_for_each_entry(iommu, &priv->list_attached, dom_node) {
+		ret = __enable_clocks(iommu);
+		if (ret)
+			goto fail;
 
-		BUG_ON(!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent);
+		list_for_each_entry(master, &iommu->ctx_list, list)
+			SET_CTX_TLBIALL(iommu->base, master->num, 0);
 
-		iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
-		BUG_ON(!iommu_drvdata);
+		__disable_clocks(iommu);
+	}
+fail:
+	return;
+}
 
-		ret = __enable_clocks(iommu_drvdata);
+static void __flush_iotlb_range(unsigned long iova, size_t size,
+				size_t granule, bool leaf, void *cookie)
+{
+	struct msm_priv *priv = cookie;
+	struct msm_iommu_dev *iommu = NULL;
+	struct msm_iommu_ctx_dev *master;
+	int ret = 0;
+	int temp_size;
+
+	list_for_each_entry(iommu, &priv->list_attached, dom_node) {
+		ret = __enable_clocks(iommu);
 		if (ret)
 			goto fail;
 
-		SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
-		__disable_clocks(iommu_drvdata);
+		list_for_each_entry(master, &iommu->ctx_list, list) {
+			temp_size = size;
+			do {
+				iova &= TLBIVA_VA;
+				iova |= GET_CONTEXTIDR_ASID(iommu->base,
+							    master->num);
+				SET_TLBIVA(iommu->base, master->num, iova);
+				iova += granule;
+			} while (temp_size -= granule);
+		}
+
+		__disable_clocks(iommu);
 	}
+
 fail:
-	return ret;
+	return;
+}
+
+static void __flush_iotlb_sync(void *cookie)
+{
+	/*
+	 * Nothing is needed here, the barrier to guarantee
+	 * completion of the tlb sync operation is implicitly
+	 * taken care when the iommu client does a writel before
+	 * kick starting the other master.
+	 */
+}
+
+static const struct iommu_gather_ops msm_iommu_gather_ops = {
+	.tlb_flush_all = __flush_iotlb,
+	.tlb_add_flush = __flush_iotlb_range,
+	.tlb_sync = __flush_iotlb_sync,
+};
+
+static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
+{
+	int idx;
+
+	do {
+		idx = find_next_zero_bit(map, end, start);
+		if (idx == end)
+			return -ENOSPC;
+	} while (test_and_set_bit(idx, map));
+
+	return idx;
+}
+
+static void msm_iommu_free_ctx(unsigned long *map, int idx)
+{
+	clear_bit(idx, map);
+}
+
+static void config_mids(struct msm_iommu_dev *iommu,
+			struct msm_iommu_ctx_dev *master)
+{
+	int mid, ctx, i;
+
+	for (i = 0; i < master->num_mids; i++) {
+		mid = master->mids[i];
+		ctx = master->num;
+
+		SET_M2VCBR_N(iommu->base, mid, 0);
+		SET_CBACR_N(iommu->base, ctx, 0);
+
+		/* Set VMID = 0 */
+		SET_VMID(iommu->base, mid, 0);
+
+		/* Set the context number for that MID to this context */
+		SET_CBNDX(iommu->base, mid, ctx);
+
+		/* Set MID associated with this context bank to 0*/
+		SET_CBVMID(iommu->base, ctx, 0);
+
+		/* Set the ASID for TLB tagging for this context */
+		SET_CONTEXTIDR_ASID(iommu->base, ctx, ctx);
+
+		/* Set security bit override to be Non-secure */
+		SET_NSCFG(iommu->base, mid, 3);
+	}
 }
 
 static void __reset_context(void __iomem *base, int ctx)
@@ -143,15 +262,17 @@ static void __reset_context(void __iomem *base, int ctx)
 	SET_TLBFLPTER(base, ctx, 0);
 	SET_TLBSLPTER(base, ctx, 0);
 	SET_TLBLKCR(base, ctx, 0);
-	SET_PRRR(base, ctx, 0);
-	SET_NMRR(base, ctx, 0);
 }
 
-static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
+static void __program_context(void __iomem *base, int ctx,
+			      struct msm_priv *priv)
 {
-	unsigned int prrr, nmrr;
 	__reset_context(base, ctx);
 
+	/* Turn on TEX Remap */
+	SET_TRE(base, ctx, 1);
+	SET_AFE(base, ctx, 1);
+
 	/* Set up HTW mode */
 	/* TLB miss configuration: perform HTW on miss */
 	SET_TLBMCFG(base, ctx, 0x3);
@@ -159,8 +280,13 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
 	/* V2P configuration: HTW for access */
 	SET_V2PCFG(base, ctx, 0x3);
 
-	SET_TTBCR(base, ctx, 0);
-	SET_TTBR0_PA(base, ctx, (pgtable >> 14));
+	SET_TTBCR(base, ctx, priv->cfg.arm_v7s_cfg.tcr);
+	SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[0]);
+	SET_TTBR1(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[1]);
+
+	/* Set prrr and nmrr */
+	SET_PRRR(base, ctx, priv->cfg.arm_v7s_cfg.prrr);
+	SET_NMRR(base, ctx, priv->cfg.arm_v7s_cfg.nmrr);
 
 	/* Invalidate the TLB for this context */
 	SET_CTX_TLBIALL(base, ctx, 0);
@@ -179,38 +305,9 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
 	SET_RCOSH(base, ctx, 1);
 	SET_RCNSH(base, ctx, 1);
 
-	/* Turn on TEX Remap */
-	SET_TRE(base, ctx, 1);
-
-	/* Set TEX remap attributes */
-	RCP15_PRRR(prrr);
-	RCP15_NMRR(nmrr);
-	SET_PRRR(base, ctx, prrr);
-	SET_NMRR(base, ctx, nmrr);
-
 	/* Turn on BFB prefetch */
 	SET_BFBDFE(base, ctx, 1);
 
-#ifdef CONFIG_IOMMU_PGTABLES_L2
-	/* Configure page tables as inner-cacheable and shareable to reduce
-	 * the TLB miss penalty.
-	 */
-	SET_TTBR0_SH(base, ctx, 1);
-	SET_TTBR1_SH(base, ctx, 1);
-
-	SET_TTBR0_NOS(base, ctx, 1);
-	SET_TTBR1_NOS(base, ctx, 1);
-
-	SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */
-	SET_TTBR0_IRGNL(base, ctx, 1);
-
-	SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */
-	SET_TTBR1_IRGNL(base, ctx, 1);
-
-	SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */
-	SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */
-#endif
-
 	/* Enable the MMU */
 	SET_M(base, ctx, 1);
 }
@@ -227,13 +324,6 @@ static struct iommu_domain *msm_iommu_domain_alloc(unsigned type)
 		goto fail_nomem;
 
 	INIT_LIST_HEAD(&priv->list_attached);
-	priv->pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL,
-							  get_order(SZ_16K));
-
-	if (!priv->pgtable)
-		goto fail_nomem;
-
-	memset(priv->pgtable, 0, SZ_16K);
 
 	priv->domain.geometry.aperture_start = 0;
 	priv->domain.geometry.aperture_end   = (1ULL << 32) - 1;
@@ -250,304 +340,137 @@ static void msm_iommu_domain_free(struct iommu_domain *domain)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
-	unsigned long *fl_table;
-	int i;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
 	priv = to_msm_priv(domain);
+	kfree(priv);
+	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+}
 
-	fl_table = priv->pgtable;
-
-	for (i = 0; i < NUM_FL_PTE; i++)
-		if ((fl_table[i] & 0x03) == FL_TYPE_TABLE)
-			free_page((unsigned long) __va(((fl_table[i]) &
-							FL_BASE_MASK)));
+static int msm_iommu_domain_config(struct msm_priv *priv)
+{
+	spin_lock_init(&priv->pgtlock);
+
+	priv->cfg = (struct io_pgtable_cfg) {
+		.quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+		.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
+		.ias = 32,
+		.oas = 32,
+		.tlb = &msm_iommu_gather_ops,
+		.iommu_dev = priv->dev,
+	};
+
+	priv->iop = alloc_io_pgtable_ops(ARM_V7S, &priv->cfg, priv);
+	if (!priv->iop) {
+		dev_err(priv->dev, "Failed to allocate pgtable\n");
+		return -EINVAL;
+	}
 
-	free_pages((unsigned long)priv->pgtable, get_order(SZ_16K));
-	priv->pgtable = NULL;
+	msm_iommu_ops.pgsize_bitmap = priv->cfg.pgsize_bitmap;
 
-	kfree(priv);
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	return 0;
 }
 
 static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
-	struct msm_priv *priv;
-	struct msm_iommu_ctx_dev *ctx_dev;
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
-	struct msm_iommu_ctx_drvdata *tmp_drvdata;
 	int ret = 0;
 	unsigned long flags;
+	struct msm_iommu_dev *iommu;
+	struct msm_priv *priv = to_msm_priv(domain);
+	struct msm_iommu_ctx_dev *master;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-
-	priv = to_msm_priv(domain);
-
-	if (!dev) {
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	iommu_drvdata = dev_get_drvdata(dev->parent);
-	ctx_drvdata = dev_get_drvdata(dev);
-	ctx_dev = dev->platform_data;
-
-	if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) {
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	if (!list_empty(&ctx_drvdata->attached_elm)) {
-		ret = -EBUSY;
-		goto fail;
-	}
+	priv->dev = dev;
+	msm_iommu_domain_config(priv);
 
-	list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm)
-		if (tmp_drvdata == ctx_drvdata) {
-			ret = -EBUSY;
-			goto fail;
+	spin_lock_irqsave(&msm_iommu_lock, flags);
+	list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) {
+		master = list_first_entry(&iommu->ctx_list,
+					  struct msm_iommu_ctx_dev,
+					  list);
+		if (master->of_node == dev->of_node) {
+			ret = __enable_clocks(iommu);
+			if (ret)
+				goto fail;
+
+			list_for_each_entry(master, &iommu->ctx_list, list) {
+				if (master->num) {
+					dev_err(dev, "domain already attached");
+					ret = -EEXIST;
+					goto fail;
+				}
+				master->num =
+					msm_iommu_alloc_ctx(iommu->context_map,
+							    0, iommu->ncb);
+					if (IS_ERR_VALUE(master->num)) {
+						ret = -ENODEV;
+						goto fail;
+					}
+				config_mids(iommu, master);
+				__program_context(iommu->base, master->num,
+						  priv);
+			}
+			__disable_clocks(iommu);
+			list_add(&iommu->dom_node, &priv->list_attached);
 		}
-
-	ret = __enable_clocks(iommu_drvdata);
-	if (ret)
-		goto fail;
-
-	__program_context(iommu_drvdata->base, ctx_dev->num,
-			  __pa(priv->pgtable));
-
-	__disable_clocks(iommu_drvdata);
-	list_add(&(ctx_drvdata->attached_elm), &priv->list_attached);
-	ret = __flush_iotlb(domain);
+	}
 
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+
 	return ret;
 }
 
 static void msm_iommu_detach_dev(struct iommu_domain *domain,
 				 struct device *dev)
 {
-	struct msm_priv *priv;
-	struct msm_iommu_ctx_dev *ctx_dev;
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
+	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
+	struct msm_iommu_dev *iommu;
+	struct msm_iommu_ctx_dev *master;
 	int ret;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-	priv = to_msm_priv(domain);
-
-	if (!dev)
-		goto fail;
-
-	iommu_drvdata = dev_get_drvdata(dev->parent);
-	ctx_drvdata = dev_get_drvdata(dev);
-	ctx_dev = dev->platform_data;
-
-	if (!iommu_drvdata || !ctx_drvdata || !ctx_dev)
-		goto fail;
-
-	ret = __flush_iotlb(domain);
-	if (ret)
-		goto fail;
-
-	ret = __enable_clocks(iommu_drvdata);
-	if (ret)
-		goto fail;
+	free_io_pgtable_ops(priv->iop);
 
-	__reset_context(iommu_drvdata->base, ctx_dev->num);
-	__disable_clocks(iommu_drvdata);
-	list_del_init(&ctx_drvdata->attached_elm);
+	spin_lock_irqsave(&msm_iommu_lock, flags);
+	list_for_each_entry(iommu, &priv->list_attached, dom_node) {
+		ret = __enable_clocks(iommu);
+		if (ret)
+			goto fail;
 
+		list_for_each_entry(master, &iommu->ctx_list, list) {
+			msm_iommu_free_ctx(iommu->context_map, master->num);
+			__reset_context(iommu->base, master->num);
+		}
+		__disable_clocks(iommu);
+	}
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
 }
 
-static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
+static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t pa, size_t len, int prot)
 {
-	struct msm_priv *priv;
+	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
-	unsigned long *fl_table;
-	unsigned long *fl_pte;
-	unsigned long fl_offset;
-	unsigned long *sl_table;
-	unsigned long *sl_pte;
-	unsigned long sl_offset;
-	unsigned int pgprot;
-	int ret = 0, tex, sh;
-
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-
-	sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
-	tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK];
-
-	if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	priv = to_msm_priv(domain);
-
-	fl_table = priv->pgtable;
-
-	if (len != SZ_16M && len != SZ_1M &&
-	    len != SZ_64K && len != SZ_4K) {
-		pr_debug("Bad size: %d\n", len);
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	if (!fl_table) {
-		pr_debug("Null page table\n");
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	if (len == SZ_16M || len == SZ_1M) {
-		pgprot = sh ? FL_SHARED : 0;
-		pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
-		pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
-		pgprot |= tex & 0x04 ? FL_TEX0 : 0;
-	} else	{
-		pgprot = sh ? SL_SHARED : 0;
-		pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
-		pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
-		pgprot |= tex & 0x04 ? SL_TEX0 : 0;
-	}
-
-	fl_offset = FL_OFFSET(va);	/* Upper 12 bits */
-	fl_pte = fl_table + fl_offset;	/* int pointers, 4 bytes */
-
-	if (len == SZ_16M) {
-		int i = 0;
-		for (i = 0; i < 16; i++)
-			*(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
-				  FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
-				  FL_SHARED | FL_NG | pgprot;
-	}
-
-	if (len == SZ_1M)
-		*fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG |
-					    FL_TYPE_SECT | FL_SHARED | pgprot;
-
-	/* Need a 2nd level table */
-	if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
-		unsigned long *sl;
-		sl = (unsigned long *) __get_free_pages(GFP_ATOMIC,
-							get_order(SZ_4K));
-
-		if (!sl) {
-			pr_debug("Could not allocate second level table\n");
-			ret = -ENOMEM;
-			goto fail;
-		}
-
-		memset(sl, 0, SZ_4K);
-		*fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE);
-	}
-
-	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
-	sl_offset = SL_OFFSET(va);
-	sl_pte = sl_table + sl_offset;
-
-
-	if (len == SZ_4K)
-		*sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG |
-					  SL_SHARED | SL_TYPE_SMALL | pgprot;
-
-	if (len == SZ_64K) {
-		int i;
+	int ret;
 
-		for (i = 0; i < 16; i++)
-			*(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
-			    SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
-	}
+	spin_lock_irqsave(&priv->pgtlock, flags);
+	ret = priv->iop->map(priv->iop, iova, pa, len, prot);
+	spin_unlock_irqrestore(&priv->pgtlock, flags);
 
-	ret = __flush_iotlb(domain);
-fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
 	return ret;
 }
 
-static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
-			    size_t len)
+static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+			      size_t len)
 {
-	struct msm_priv *priv;
+	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
-	unsigned long *fl_table;
-	unsigned long *fl_pte;
-	unsigned long fl_offset;
-	unsigned long *sl_table;
-	unsigned long *sl_pte;
-	unsigned long sl_offset;
-	int i, ret = 0;
-
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-
-	priv = to_msm_priv(domain);
 
-	fl_table = priv->pgtable;
+	spin_lock_irqsave(&priv->pgtlock, flags);
+	len = priv->iop->unmap(priv->iop, iova, len);
+	spin_unlock_irqrestore(&priv->pgtlock, flags);
 
-	if (len != SZ_16M && len != SZ_1M &&
-	    len != SZ_64K && len != SZ_4K) {
-		pr_debug("Bad length: %d\n", len);
-		goto fail;
-	}
-
-	if (!fl_table) {
-		pr_debug("Null page table\n");
-		goto fail;
-	}
-
-	fl_offset = FL_OFFSET(va);	/* Upper 12 bits */
-	fl_pte = fl_table + fl_offset;	/* int pointers, 4 bytes */
-
-	if (*fl_pte == 0) {
-		pr_debug("First level PTE is 0\n");
-		goto fail;
-	}
-
-	/* Unmap supersection */
-	if (len == SZ_16M)
-		for (i = 0; i < 16; i++)
-			*(fl_pte+i) = 0;
-
-	if (len == SZ_1M)
-		*fl_pte = 0;
-
-	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
-	sl_offset = SL_OFFSET(va);
-	sl_pte = sl_table + sl_offset;
-
-	if (len == SZ_64K) {
-		for (i = 0; i < 16; i++)
-			*(sl_pte+i) = 0;
-	}
-
-	if (len == SZ_4K)
-		*sl_pte = 0;
-
-	if (len == SZ_4K || len == SZ_64K) {
-		int used = 0;
-
-		for (i = 0; i < NUM_SL_PTE; i++)
-			if (sl_table[i])
-				used = 1;
-		if (!used) {
-			free_page((unsigned long)sl_table);
-			*fl_pte = 0;
-		}
-	}
-
-	ret = __flush_iotlb(domain);
-
-fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
-
-	/* the IOMMU API requires us to return how many bytes were unmapped */
-	len = ret ? 0 : len;
 	return len;
 }
 
@@ -555,47 +478,46 @@ static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
 					  dma_addr_t va)
 {
 	struct msm_priv *priv;
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
+	struct msm_iommu_dev *iommu;
+	struct msm_iommu_ctx_dev *master;
 	unsigned int par;
 	unsigned long flags;
-	void __iomem *base;
 	phys_addr_t ret = 0;
-	int ctx;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
 
 	priv = to_msm_priv(domain);
-	if (list_empty(&priv->list_attached))
-		goto fail;
+	iommu = list_first_entry(&priv->list_attached,
+				 struct msm_iommu_dev, dom_node);
 
-	ctx_drvdata = list_entry(priv->list_attached.next,
-				 struct msm_iommu_ctx_drvdata, attached_elm);
-	iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
+	if (list_empty(&iommu->ctx_list))
+		goto fail;
 
-	base = iommu_drvdata->base;
-	ctx = ctx_drvdata->num;
+	master = list_first_entry(&iommu->ctx_list,
+				  struct msm_iommu_ctx_dev, list);
+	if (!master)
+		goto fail;
 
-	ret = __enable_clocks(iommu_drvdata);
+	ret = __enable_clocks(iommu);
 	if (ret)
 		goto fail;
 
 	/* Invalidate context TLB */
-	SET_CTX_TLBIALL(base, ctx, 0);
-	SET_V2PPR(base, ctx, va & V2Pxx_VA);
+	SET_CTX_TLBIALL(iommu->base, master->num, 0);
+	SET_V2PPR(iommu->base, master->num, va & V2Pxx_VA);
 
-	par = GET_PAR(base, ctx);
+	par = GET_PAR(iommu->base, master->num);
 
 	/* We are dealing with a supersection */
-	if (GET_NOFAULT_SS(base, ctx))
+	if (GET_NOFAULT_SS(iommu->base, master->num))
 		ret = (par & 0xFF000000) | (va & 0x00FFFFFF);
 	else	/* Upper 20 bits from PAR, lower 12 from VA */
 		ret = (par & 0xFFFFF000) | (va & 0x00000FFF);
 
-	if (GET_FAULT(base, ctx))
+	if (GET_FAULT(iommu->base, master->num))
 		ret = 0;
 
-	__disable_clocks(iommu_drvdata);
+	__disable_clocks(iommu);
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
 	return ret;
@@ -629,49 +551,92 @@ static void print_ctx_regs(void __iomem *base, int ctx)
 	       GET_TTBR0(base, ctx), GET_TTBR1(base, ctx));
 	pr_err("SCTLR  = %08x    ACTLR  = %08x\n",
 	       GET_SCTLR(base, ctx), GET_ACTLR(base, ctx));
-	pr_err("PRRR   = %08x    NMRR   = %08x\n",
-	       GET_PRRR(base, ctx), GET_NMRR(base, ctx));
+}
+
+static void insert_iommu_master(struct device *dev,
+				struct msm_iommu_dev **iommu,
+				struct of_phandle_args *spec)
+{
+	struct msm_iommu_ctx_dev *master = dev->archdata.iommu;
+	int sid;
+
+	if (list_empty(&(*iommu)->ctx_list)) {
+		master = kzalloc(sizeof(*master), GFP_ATOMIC);
+		master->of_node = dev->of_node;
+		list_add(&master->list, &(*iommu)->ctx_list);
+		dev->archdata.iommu = master;
+	}
+
+	for (sid = 0; sid < master->num_mids; sid++)
+		if (master->mids[sid] == spec->args[0]) {
+			dev_warn(dev, "Stream ID 0x%hx repeated; ignoring\n",
+				 sid);
+			return;
+		}
+
+	master->mids[master->num_mids++] = spec->args[0];
+}
+
+static int qcom_iommu_of_xlate(struct device *dev,
+			       struct of_phandle_args *spec)
+{
+	struct msm_iommu_dev *iommu;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&msm_iommu_lock, flags);
+	list_for_each_entry(iommu, &qcom_iommu_devices, dev_node)
+		if (iommu->dev->of_node == spec->np)
+			break;
+
+	if (!iommu || iommu->dev->of_node != spec->np) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	insert_iommu_master(dev, &iommu, spec);
+fail:
+	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+
+	return ret;
 }
 
 irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
 {
-	struct msm_iommu_drvdata *drvdata = dev_id;
-	void __iomem *base;
+	struct msm_iommu_dev *iommu = dev_id;
 	unsigned int fsr;
 	int i, ret;
 
 	spin_lock(&msm_iommu_lock);
 
-	if (!drvdata) {
+	if (!iommu) {
 		pr_err("Invalid device ID in context interrupt handler\n");
 		goto fail;
 	}
 
-	base = drvdata->base;
-
 	pr_err("Unexpected IOMMU page fault!\n");
-	pr_err("base = %08x\n", (unsigned int) base);
+	pr_err("base = %08x\n", (unsigned int)iommu->base);
 
-	ret = __enable_clocks(drvdata);
+	ret = __enable_clocks(iommu);
 	if (ret)
 		goto fail;
 
-	for (i = 0; i < drvdata->ncb; i++) {
-		fsr = GET_FSR(base, i);
+	for (i = 0; i < iommu->ncb; i++) {
+		fsr = GET_FSR(iommu->base, i);
 		if (fsr) {
 			pr_err("Fault occurred in context %d.\n", i);
 			pr_err("Interesting registers:\n");
-			print_ctx_regs(base, i);
-			SET_FSR(base, i, 0x4000000F);
+			print_ctx_regs(iommu->base, i);
+			SET_FSR(iommu->base, i, 0x4000000F);
 		}
 	}
-	__disable_clocks(drvdata);
+	__disable_clocks(iommu);
 fail:
 	spin_unlock(&msm_iommu_lock);
 	return 0;
 }
 
-static const struct iommu_ops msm_iommu_ops = {
+static struct iommu_ops msm_iommu_ops = {
 	.capable = msm_iommu_capable,
 	.domain_alloc = msm_iommu_domain_alloc,
 	.domain_free = msm_iommu_domain_free,
@@ -682,54 +647,163 @@ static const struct iommu_ops msm_iommu_ops = {
 	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
+	.of_xlate = qcom_iommu_of_xlate,
 };
 
-static int __init get_tex_class(int icp, int ocp, int mt, int nos)
+static int msm_iommu_probe(struct platform_device *pdev)
 {
-	int i = 0;
-	unsigned int prrr = 0;
-	unsigned int nmrr = 0;
-	int c_icp, c_ocp, c_mt, c_nos;
-
-	RCP15_PRRR(prrr);
-	RCP15_NMRR(nmrr);
-
-	for (i = 0; i < NUM_TEX_CLASS; i++) {
-		c_nos = PRRR_NOS(prrr, i);
-		c_mt = PRRR_MT(prrr, i);
-		c_icp = NMRR_ICP(nmrr, i);
-		c_ocp = NMRR_OCP(nmrr, i);
-
-		if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos)
-			return i;
+	struct resource *r;
+	struct msm_iommu_dev *iommu;
+	int ret, par, val;
+
+	iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return -ENODEV;
+
+	iommu->dev = &pdev->dev;
+	INIT_LIST_HEAD(&iommu->ctx_list);
+
+	iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk");
+	if (IS_ERR(iommu->pclk)) {
+		dev_err(iommu->dev, "could not get smmu_pclk\n");
+		return PTR_ERR(iommu->pclk);
+	}
+
+	ret = clk_prepare(iommu->pclk);
+	if (ret) {
+		dev_err(iommu->dev, "could not prepare smmu_pclk\n");
+		return ret;
+	}
+
+	iommu->clk = devm_clk_get(iommu->dev, "iommu_clk");
+	if (IS_ERR(iommu->clk)) {
+		dev_err(iommu->dev, "could not get iommu_clk\n");
+		clk_unprepare(iommu->pclk);
+		return PTR_ERR(iommu->clk);
+	}
+
+	ret = clk_prepare(iommu->clk);
+	if (ret) {
+		dev_err(iommu->dev, "could not prepare iommu_clk\n");
+		clk_unprepare(iommu->pclk);
+		return ret;
+	}
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iommu->base = devm_ioremap_resource(iommu->dev, r);
+	if (IS_ERR(iommu->base)) {
+		dev_err(iommu->dev, "could not get iommu base\n");
+		ret = PTR_ERR(iommu->base);
+		goto fail;
 	}
 
-	return -ENODEV;
+	iommu->irq = platform_get_irq(pdev, 0);
+	if (iommu->irq < 0) {
+		dev_err(iommu->dev, "could not get iommu irq\n");
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	ret = of_property_read_u32(iommu->dev->of_node, "qcom,ncb", &val);
+	if (ret) {
+		dev_err(iommu->dev, "could not get ncb\n");
+		goto fail;
+	}
+	iommu->ncb = val;
+
+	msm_iommu_reset(iommu->base, iommu->ncb);
+	SET_M(iommu->base, 0, 1);
+	SET_PAR(iommu->base, 0, 0);
+	SET_V2PCFG(iommu->base, 0, 1);
+	SET_V2PPR(iommu->base, 0, 0);
+	par = GET_PAR(iommu->base, 0);
+	SET_V2PCFG(iommu->base, 0, 0);
+	SET_M(iommu->base, 0, 0);
+
+	if (!par) {
+		pr_err("Invalid PAR value detected\n");
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL,
+					msm_iommu_fault_handler,
+					IRQF_ONESHOT | IRQF_SHARED,
+					"msm_iommu_secure_irpt_handler",
+					iommu);
+	if (ret) {
+		pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret);
+		goto fail;
+	}
+
+	list_add(&iommu->dev_node, &qcom_iommu_devices);
+	of_iommu_set_ops(pdev->dev.of_node, &msm_iommu_ops);
+
+	pr_info("device mapped at %p, irq %d with %d ctx banks\n",
+		iommu->base, iommu->irq, iommu->ncb);
+
+	return ret;
+fail:
+	clk_unprepare(iommu->clk);
+	clk_unprepare(iommu->pclk);
+	return ret;
+}
+
+static const struct of_device_id msm_iommu_dt_match[] = {
+	{ .compatible = "qcom,apq8064-iommu" },
+	{}
+};
+
+static int msm_iommu_remove(struct platform_device *pdev)
+{
+	struct msm_iommu_dev *iommu = platform_get_drvdata(pdev);
+
+	clk_unprepare(iommu->clk);
+	clk_unprepare(iommu->pclk);
+	return 0;
 }
 
-static void __init setup_iommu_tex_classes(void)
+static struct platform_driver msm_iommu_driver = {
+	.driver = {
+		.name	= "msm_iommu",
+		.of_match_table = msm_iommu_dt_match,
+	},
+	.probe		= msm_iommu_probe,
+	.remove		= msm_iommu_remove,
+};
+
+static int __init msm_iommu_driver_init(void)
 {
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] =
-			get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1);
+	int ret;
 
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] =
-			get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1);
+	ret = platform_driver_register(&msm_iommu_driver);
+	if (ret != 0)
+		pr_err("Failed to register IOMMU driver\n");
 
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] =
-			get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1);
+	return ret;
+}
 
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] =
-			get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1);
+static void __exit msm_iommu_driver_exit(void)
+{
+	platform_driver_unregister(&msm_iommu_driver);
 }
 
+subsys_initcall(msm_iommu_driver_init);
+module_exit(msm_iommu_driver_exit);
+
 static int __init msm_iommu_init(void)
 {
-	setup_iommu_tex_classes();
 	bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
 	return 0;
 }
 
-subsys_initcall(msm_iommu_init);
+static int __init msm_iommu_of_setup(struct device_node *np)
+{
+	msm_iommu_init();
+	return 0;
+}
+
+IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu", msm_iommu_of_setup);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/msm_iommu.h b/drivers/iommu/msm_iommu.h
index 5c7c955e6d25..4ca25d50d679 100644
--- a/drivers/iommu/msm_iommu.h
+++ b/drivers/iommu/msm_iommu.h
@@ -42,74 +42,53 @@
  */
 #define MAX_NUM_MIDS	32
 
+/* Maximum number of context banks that can be present in IOMMU */
+#define IOMMU_MAX_CBS	128
+
 /**
  * struct msm_iommu_dev - a single IOMMU hardware instance
- * name		Human-readable name given to this IOMMU HW instance
  * ncb		Number of context banks present on this IOMMU HW instance
+ * dev:		IOMMU device
+ * irq:		Interrupt number
+ * clk:		The bus clock for this IOMMU hardware instance
+ * pclk:	The clock for the IOMMU bus interconnect
+ * dev_node:	list head in qcom_iommu_device_list
+ * dom_node:	list head for domain
+ * ctx_list:	list of 'struct msm_iommu_ctx_dev'
+ * context_map: Bitmap to track allocated context banks
  */
 struct msm_iommu_dev {
-	const char *name;
+	void __iomem *base;
 	int ncb;
+	struct device *dev;
+	int irq;
+	struct clk *clk;
+	struct clk *pclk;
+	struct list_head dev_node;
+	struct list_head dom_node;
+	struct list_head ctx_list;
+	DECLARE_BITMAP(context_map, IOMMU_MAX_CBS);
 };
 
 /**
  * struct msm_iommu_ctx_dev - an IOMMU context bank instance
- * name		Human-readable name given to this context bank
+ * of_node	node ptr of client device
  * num		Index of this context bank within the hardware
  * mids		List of Machine IDs that are to be mapped into this context
  *		bank, terminated by -1. The MID is a set of signals on the
  *		AXI bus that identifies the function associated with a specific
  *		memory request. (See ARM spec).
+ * num_mids	Total number of mids
+ * node		list head in ctx_list
  */
 struct msm_iommu_ctx_dev {
-	const char *name;
+	struct device_node *of_node;
 	int num;
 	int mids[MAX_NUM_MIDS];
+	int num_mids;
+	struct list_head list;
 };
 
-
-/**
- * struct msm_iommu_drvdata - A single IOMMU hardware instance
- * @base:	IOMMU config port base address (VA)
- * @ncb		The number of contexts on this IOMMU
- * @irq:	Interrupt number
- * @clk:	The bus clock for this IOMMU hardware instance
- * @pclk:	The clock for the IOMMU bus interconnect
- *
- * A msm_iommu_drvdata holds the global driver data about a single piece
- * of an IOMMU hardware instance.
- */
-struct msm_iommu_drvdata {
-	void __iomem *base;
-	int irq;
-	int ncb;
-	struct clk *clk;
-	struct clk *pclk;
-};
-
-/**
- * struct msm_iommu_ctx_drvdata - an IOMMU context bank instance
- * @num:		Hardware context number of this context
- * @pdev:		Platform device associated wit this HW instance
- * @attached_elm:	List element for domains to track which devices are
- *			attached to them
- *
- * A msm_iommu_ctx_drvdata holds the driver data for a single context bank
- * within each IOMMU hardware instance
- */
-struct msm_iommu_ctx_drvdata {
-	int num;
-	struct platform_device *pdev;
-	struct list_head attached_elm;
-};
-
-/*
- * Look up an IOMMU context device by its context name. NULL if none found.
- * Useful for testing and drivers that do not yet fully have IOMMU stuff in
- * their platform devices.
- */
-struct device *msm_iommu_get_ctx(const char *ctx_name);
-
 /*
  * Interrupt handler for the IOMMU context fault interrupt. Hooking the
  * interrupt is not supported in the API yet, but this will print an error
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
deleted file mode 100644
index 4b09e815accf..000000000000
--- a/drivers/iommu/msm_iommu_dev.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/iommu.h>
-#include <linux/interrupt.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-#include "msm_iommu_hw-8xxx.h"
-#include "msm_iommu.h"
-
-struct iommu_ctx_iter_data {
-	/* input */
-	const char *name;
-
-	/* output */
-	struct device *dev;
-};
-
-static struct platform_device *msm_iommu_root_dev;
-
-static int each_iommu_ctx(struct device *dev, void *data)
-{
-	struct iommu_ctx_iter_data *res = data;
-	struct msm_iommu_ctx_dev *c = dev->platform_data;
-
-	if (!res || !c || !c->name || !res->name)
-		return -EINVAL;
-
-	if (!strcmp(res->name, c->name)) {
-		res->dev = dev;
-		return 1;
-	}
-	return 0;
-}
-
-static int each_iommu(struct device *dev, void *data)
-{
-	return device_for_each_child(dev, data, each_iommu_ctx);
-}
-
-struct device *msm_iommu_get_ctx(const char *ctx_name)
-{
-	struct iommu_ctx_iter_data r;
-	int found;
-
-	if (!msm_iommu_root_dev) {
-		pr_err("No root IOMMU device.\n");
-		goto fail;
-	}
-
-	r.name = ctx_name;
-	found = device_for_each_child(&msm_iommu_root_dev->dev, &r, each_iommu);
-
-	if (!found) {
-		pr_err("Could not find context <%s>\n", ctx_name);
-		goto fail;
-	}
-
-	return r.dev;
-fail:
-	return NULL;
-}
-EXPORT_SYMBOL(msm_iommu_get_ctx);
-
-static void msm_iommu_reset(void __iomem *base, int ncb)
-{
-	int ctx;
-
-	SET_RPUE(base, 0);
-	SET_RPUEIE(base, 0);
-	SET_ESRRESTORE(base, 0);
-	SET_TBE(base, 0);
-	SET_CR(base, 0);
-	SET_SPDMBE(base, 0);
-	SET_TESTBUSCR(base, 0);
-	SET_TLBRSW(base, 0);
-	SET_GLOBAL_TLBIALL(base, 0);
-	SET_RPU_ACR(base, 0);
-	SET_TLBLKCRWE(base, 1);
-
-	for (ctx = 0; ctx < ncb; ctx++) {
-		SET_BPRCOSH(base, ctx, 0);
-		SET_BPRCISH(base, ctx, 0);
-		SET_BPRCNSH(base, ctx, 0);
-		SET_BPSHCFG(base, ctx, 0);
-		SET_BPMTCFG(base, ctx, 0);
-		SET_ACTLR(base, ctx, 0);
-		SET_SCTLR(base, ctx, 0);
-		SET_FSRRESTORE(base, ctx, 0);
-		SET_TTBR0(base, ctx, 0);
-		SET_TTBR1(base, ctx, 0);
-		SET_TTBCR(base, ctx, 0);
-		SET_BFBCR(base, ctx, 0);
-		SET_PAR(base, ctx, 0);
-		SET_FAR(base, ctx, 0);
-		SET_CTX_TLBIALL(base, ctx, 0);
-		SET_TLBFLPTER(base, ctx, 0);
-		SET_TLBSLPTER(base, ctx, 0);
-		SET_TLBLKCR(base, ctx, 0);
-		SET_PRRR(base, ctx, 0);
-		SET_NMRR(base, ctx, 0);
-		SET_CONTEXTIDR(base, ctx, 0);
-	}
-}
-
-static int msm_iommu_probe(struct platform_device *pdev)
-{
-	struct resource *r;
-	struct clk *iommu_clk;
-	struct clk *iommu_pclk;
-	struct msm_iommu_drvdata *drvdata;
-	struct msm_iommu_dev *iommu_dev = dev_get_platdata(&pdev->dev);
-	void __iomem *regs_base;
-	int ret, irq, par;
-
-	if (pdev->id == -1) {
-		msm_iommu_root_dev = pdev;
-		return 0;
-	}
-
-	drvdata = kzalloc(sizeof(*drvdata), GFP_KERNEL);
-
-	if (!drvdata) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	if (!iommu_dev) {
-		ret = -ENODEV;
-		goto fail;
-	}
-
-	iommu_pclk = clk_get(NULL, "smmu_pclk");
-	if (IS_ERR(iommu_pclk)) {
-		ret = -ENODEV;
-		goto fail;
-	}
-
-	ret = clk_prepare_enable(iommu_pclk);
-	if (ret)
-		goto fail_enable;
-
-	iommu_clk = clk_get(&pdev->dev, "iommu_clk");
-
-	if (!IS_ERR(iommu_clk))	{
-		if (clk_get_rate(iommu_clk) == 0)
-			clk_set_rate(iommu_clk, 1);
-
-		ret = clk_prepare_enable(iommu_clk);
-		if (ret) {
-			clk_put(iommu_clk);
-			goto fail_pclk;
-		}
-	} else
-		iommu_clk = NULL;
-
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase");
-	regs_base = devm_ioremap_resource(&pdev->dev, r);
-	if (IS_ERR(regs_base)) {
-		ret = PTR_ERR(regs_base);
-		goto fail_clk;
-	}
-
-	irq = platform_get_irq_byname(pdev, "secure_irq");
-	if (irq < 0) {
-		ret = -ENODEV;
-		goto fail_clk;
-	}
-
-	msm_iommu_reset(regs_base, iommu_dev->ncb);
-
-	SET_M(regs_base, 0, 1);
-	SET_PAR(regs_base, 0, 0);
-	SET_V2PCFG(regs_base, 0, 1);
-	SET_V2PPR(regs_base, 0, 0);
-	par = GET_PAR(regs_base, 0);
-	SET_V2PCFG(regs_base, 0, 0);
-	SET_M(regs_base, 0, 0);
-
-	if (!par) {
-		pr_err("%s: Invalid PAR value detected\n", iommu_dev->name);
-		ret = -ENODEV;
-		goto fail_clk;
-	}
-
-	ret = request_irq(irq, msm_iommu_fault_handler, 0,
-			"msm_iommu_secure_irpt_handler", drvdata);
-	if (ret) {
-		pr_err("Request IRQ %d failed with ret=%d\n", irq, ret);
-		goto fail_clk;
-	}
-
-
-	drvdata->pclk = iommu_pclk;
-	drvdata->clk = iommu_clk;
-	drvdata->base = regs_base;
-	drvdata->irq = irq;
-	drvdata->ncb = iommu_dev->ncb;
-
-	pr_info("device %s mapped at %p, irq %d with %d ctx banks\n",
-		iommu_dev->name, regs_base, irq, iommu_dev->ncb);
-
-	platform_set_drvdata(pdev, drvdata);
-
-	clk_disable(iommu_clk);
-
-	clk_disable(iommu_pclk);
-
-	return 0;
-fail_clk:
-	if (iommu_clk) {
-		clk_disable(iommu_clk);
-		clk_put(iommu_clk);
-	}
-fail_pclk:
-	clk_disable_unprepare(iommu_pclk);
-fail_enable:
-	clk_put(iommu_pclk);
-fail:
-	kfree(drvdata);
-	return ret;
-}
-
-static int msm_iommu_remove(struct platform_device *pdev)
-{
-	struct msm_iommu_drvdata *drv = NULL;
-
-	drv = platform_get_drvdata(pdev);
-	if (drv) {
-		if (drv->clk) {
-			clk_unprepare(drv->clk);
-			clk_put(drv->clk);
-		}
-		clk_unprepare(drv->pclk);
-		clk_put(drv->pclk);
-		memset(drv, 0, sizeof(*drv));
-		kfree(drv);
-	}
-	return 0;
-}
-
-static int msm_iommu_ctx_probe(struct platform_device *pdev)
-{
-	struct msm_iommu_ctx_dev *c = dev_get_platdata(&pdev->dev);
-	struct msm_iommu_drvdata *drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
-	int i, ret;
-
-	if (!c || !pdev->dev.parent)
-		return -EINVAL;
-
-	drvdata = dev_get_drvdata(pdev->dev.parent);
-	if (!drvdata)
-		return -ENODEV;
-
-	ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL);
-	if (!ctx_drvdata)
-		return -ENOMEM;
-
-	ctx_drvdata->num = c->num;
-	ctx_drvdata->pdev = pdev;
-
-	INIT_LIST_HEAD(&ctx_drvdata->attached_elm);
-	platform_set_drvdata(pdev, ctx_drvdata);
-
-	ret = clk_prepare_enable(drvdata->pclk);
-	if (ret)
-		goto fail;
-
-	if (drvdata->clk) {
-		ret = clk_prepare_enable(drvdata->clk);
-		if (ret) {
-			clk_disable_unprepare(drvdata->pclk);
-			goto fail;
-		}
-	}
-
-	/* Program the M2V tables for this context */
-	for (i = 0; i < MAX_NUM_MIDS; i++) {
-		int mid = c->mids[i];
-		if (mid == -1)
-			break;
-
-		SET_M2VCBR_N(drvdata->base, mid, 0);
-		SET_CBACR_N(drvdata->base, c->num, 0);
-
-		/* Set VMID = 0 */
-		SET_VMID(drvdata->base, mid, 0);
-
-		/* Set the context number for that MID to this context */
-		SET_CBNDX(drvdata->base, mid, c->num);
-
-		/* Set MID associated with this context bank to 0*/
-		SET_CBVMID(drvdata->base, c->num, 0);
-
-		/* Set the ASID for TLB tagging for this context */
-		SET_CONTEXTIDR_ASID(drvdata->base, c->num, c->num);
-
-		/* Set security bit override to be Non-secure */
-		SET_NSCFG(drvdata->base, mid, 3);
-	}
-
-	clk_disable(drvdata->clk);
-	clk_disable(drvdata->pclk);
-
-	dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num);
-	return 0;
-fail:
-	kfree(ctx_drvdata);
-	return ret;
-}
-
-static int msm_iommu_ctx_remove(struct platform_device *pdev)
-{
-	struct msm_iommu_ctx_drvdata *drv = NULL;
-	drv = platform_get_drvdata(pdev);
-	if (drv) {
-		memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata));
-		kfree(drv);
-	}
-	return 0;
-}
-
-static struct platform_driver msm_iommu_driver = {
-	.driver = {
-		.name	= "msm_iommu",
-	},
-	.probe		= msm_iommu_probe,
-	.remove		= msm_iommu_remove,
-};
-
-static struct platform_driver msm_iommu_ctx_driver = {
-	.driver = {
-		.name	= "msm_iommu_ctx",
-	},
-	.probe		= msm_iommu_ctx_probe,
-	.remove		= msm_iommu_ctx_remove,
-};
-
-static struct platform_driver * const drivers[] = {
-	&msm_iommu_driver,
-	&msm_iommu_ctx_driver,
-};
-
-static int __init msm_iommu_driver_init(void)
-{
-	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
-}
-
-static void __exit msm_iommu_driver_exit(void)
-{
-	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
-}
-
-subsys_initcall(msm_iommu_driver_init);
-module_exit(msm_iommu_driver_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c3043d8754e3..b12c12d74c33 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -34,7 +34,7 @@
 #include <dt-bindings/memory/mt8173-larb-port.h>
 #include <soc/mediatek/smi.h>
 
-#include "io-pgtable.h"
+#include "mtk_iommu.h"
 
 #define REG_MMU_PT_BASE_ADDR			0x000
 
@@ -93,20 +93,6 @@
 
 #define MTK_PROTECT_PA_ALIGN			128
 
-struct mtk_iommu_suspend_reg {
-	u32				standard_axi_mode;
-	u32				dcm_dis;
-	u32				ctrl_reg;
-	u32				int_control0;
-	u32				int_main_control;
-};
-
-struct mtk_iommu_client_priv {
-	struct list_head		client;
-	unsigned int			mtk_m4u_id;
-	struct device			*m4udev;
-};
-
 struct mtk_iommu_domain {
 	spinlock_t			pgtlock; /* lock for page table */
 
@@ -116,19 +102,6 @@ struct mtk_iommu_domain {
 	struct iommu_domain		domain;
 };
 
-struct mtk_iommu_data {
-	void __iomem			*base;
-	int				irq;
-	struct device			*dev;
-	struct clk			*bclk;
-	phys_addr_t			protect_base; /* protect memory base */
-	struct mtk_iommu_suspend_reg	reg;
-	struct mtk_iommu_domain		*m4u_dom;
-	struct iommu_group		*m4u_group;
-	struct mtk_smi_iommu		smi_imu;      /* SMI larb iommu info */
-	bool                            enable_4GB;
-};
-
 static struct iommu_ops mtk_iommu_ops;
 
 static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
@@ -455,7 +428,6 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
 	if (!dev->archdata.iommu) {
 		/* Get the m4u device */
 		m4updev = of_find_device_by_node(args->np);
-		of_node_put(args->np);
 		if (WARN_ON(!m4updev))
 			return -EINVAL;
 
@@ -552,25 +524,6 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 	return 0;
 }
 
-static int compare_of(struct device *dev, void *data)
-{
-	return dev->of_node == data;
-}
-
-static int mtk_iommu_bind(struct device *dev)
-{
-	struct mtk_iommu_data *data = dev_get_drvdata(dev);
-
-	return component_bind_all(dev, &data->smi_imu);
-}
-
-static void mtk_iommu_unbind(struct device *dev)
-{
-	struct mtk_iommu_data *data = dev_get_drvdata(dev);
-
-	component_unbind_all(dev, &data->smi_imu);
-}
-
 static const struct component_master_ops mtk_iommu_com_ops = {
 	.bind		= mtk_iommu_bind,
 	.unbind		= mtk_iommu_unbind,
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
new file mode 100644
index 000000000000..9ed0a8462ccf
--- /dev/null
+++ b/drivers/iommu/mtk_iommu.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2015-2016 MediaTek Inc.
+ * Author: Honghui Zhang <honghui.zhang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _MTK_IOMMU_H_
+#define _MTK_IOMMU_H_
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <soc/mediatek/smi.h>
+
+#include "io-pgtable.h"
+
+struct mtk_iommu_suspend_reg {
+	u32				standard_axi_mode;
+	u32				dcm_dis;
+	u32				ctrl_reg;
+	u32				int_control0;
+	u32				int_main_control;
+};
+
+struct mtk_iommu_client_priv {
+	struct list_head		client;
+	unsigned int			mtk_m4u_id;
+	struct device			*m4udev;
+};
+
+struct mtk_iommu_domain;
+
+struct mtk_iommu_data {
+	void __iomem			*base;
+	int				irq;
+	struct device			*dev;
+	struct clk			*bclk;
+	phys_addr_t			protect_base; /* protect memory base */
+	struct mtk_iommu_suspend_reg	reg;
+	struct mtk_iommu_domain		*m4u_dom;
+	struct iommu_group		*m4u_group;
+	struct mtk_smi_iommu		smi_imu;      /* SMI larb iommu info */
+	bool                            enable_4GB;
+};
+
+static int compare_of(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static int mtk_iommu_bind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+
+	return component_bind_all(dev, &data->smi_imu);
+}
+
+static void mtk_iommu_unbind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+
+	component_unbind_all(dev, &data->smi_imu);
+}
+
+#endif
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
new file mode 100644
index 000000000000..b8aeb0768483
--- /dev/null
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -0,0 +1,727 @@
+/*
+ * Copyright (c) 2015-2016 MediaTek Inc.
+ * Author: Honghui Zhang <honghui.zhang@mediatek.com>
+ *
+ * Based on driver/iommu/mtk_iommu.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/bootmem.h>
+#include <linux/bug.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/kmemleak.h>
+#include <linux/list.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/barrier.h>
+#include <asm/dma-iommu.h>
+#include <linux/module.h>
+#include <dt-bindings/memory/mt2701-larb-port.h>
+#include <soc/mediatek/smi.h>
+#include "mtk_iommu.h"
+
+#define REG_MMU_PT_BASE_ADDR			0x000
+
+#define F_ALL_INVLD				0x2
+#define F_MMU_INV_RANGE				0x1
+#define F_INVLD_EN0				BIT(0)
+#define F_INVLD_EN1				BIT(1)
+
+#define F_MMU_FAULT_VA_MSK			0xfffff000
+#define MTK_PROTECT_PA_ALIGN			128
+
+#define REG_MMU_CTRL_REG			0x210
+#define F_MMU_CTRL_COHERENT_EN			BIT(8)
+#define REG_MMU_IVRP_PADDR			0x214
+#define REG_MMU_INT_CONTROL			0x220
+#define F_INT_TRANSLATION_FAULT			BIT(0)
+#define F_INT_MAIN_MULTI_HIT_FAULT		BIT(1)
+#define F_INT_INVALID_PA_FAULT			BIT(2)
+#define F_INT_ENTRY_REPLACEMENT_FAULT		BIT(3)
+#define F_INT_TABLE_WALK_FAULT			BIT(4)
+#define F_INT_TLB_MISS_FAULT			BIT(5)
+#define F_INT_PFH_DMA_FIFO_OVERFLOW		BIT(6)
+#define F_INT_MISS_DMA_FIFO_OVERFLOW		BIT(7)
+
+#define F_MMU_TF_PROTECT_SEL(prot)		(((prot) & 0x3) << 5)
+#define F_INT_CLR_BIT				BIT(12)
+
+#define REG_MMU_FAULT_ST			0x224
+#define REG_MMU_FAULT_VA			0x228
+#define REG_MMU_INVLD_PA			0x22C
+#define REG_MMU_INT_ID				0x388
+#define REG_MMU_INVALIDATE			0x5c0
+#define REG_MMU_INVLD_START_A			0x5c4
+#define REG_MMU_INVLD_END_A			0x5c8
+
+#define REG_MMU_INV_SEL				0x5d8
+#define REG_MMU_STANDARD_AXI_MODE		0x5e8
+
+#define REG_MMU_DCM				0x5f0
+#define F_MMU_DCM_ON				BIT(1)
+#define REG_MMU_CPE_DONE			0x60c
+#define F_DESC_VALID				0x2
+#define F_DESC_NONSEC				BIT(3)
+#define MT2701_M4U_TF_LARB(TF)			(6 - (((TF) >> 13) & 0x7))
+#define MT2701_M4U_TF_PORT(TF)			(((TF) >> 8) & 0xF)
+/* MTK generation one iommu HW only support 4K size mapping */
+#define MT2701_IOMMU_PAGE_SHIFT			12
+#define MT2701_IOMMU_PAGE_SIZE			(1UL << MT2701_IOMMU_PAGE_SHIFT)
+
+/*
+ * MTK m4u support 4GB iova address space, and only support 4K page
+ * mapping. So the pagetable size should be exactly as 4M.
+ */
+#define M2701_IOMMU_PGT_SIZE			SZ_4M
+
+struct mtk_iommu_domain {
+	spinlock_t			pgtlock; /* lock for page table */
+	struct iommu_domain		domain;
+	u32				*pgt_va;
+	dma_addr_t			pgt_pa;
+	struct mtk_iommu_data		*data;
+};
+
+static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct mtk_iommu_domain, domain);
+}
+
+static const int mt2701_m4u_in_larb[] = {
+	LARB0_PORT_OFFSET, LARB1_PORT_OFFSET,
+	LARB2_PORT_OFFSET, LARB3_PORT_OFFSET
+};
+
+static inline int mt2701_m4u_to_larb(int id)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(mt2701_m4u_in_larb) - 1; i >= 0; i--)
+		if ((id) >= mt2701_m4u_in_larb[i])
+			return i;
+
+	return 0;
+}
+
+static inline int mt2701_m4u_to_port(int id)
+{
+	int larb = mt2701_m4u_to_larb(id);
+
+	return id - mt2701_m4u_in_larb[larb];
+}
+
+static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data)
+{
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
+			data->base + REG_MMU_INV_SEL);
+	writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
+	wmb(); /* Make sure the tlb flush all done */
+}
+
+static void mtk_iommu_tlb_flush_range(struct mtk_iommu_data *data,
+				unsigned long iova, size_t size)
+{
+	int ret;
+	u32 tmp;
+
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
+		data->base + REG_MMU_INV_SEL);
+	writel_relaxed(iova & F_MMU_FAULT_VA_MSK,
+		data->base + REG_MMU_INVLD_START_A);
+	writel_relaxed((iova + size - 1) & F_MMU_FAULT_VA_MSK,
+		data->base + REG_MMU_INVLD_END_A);
+	writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
+
+	ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
+				tmp, tmp != 0, 10, 100000);
+	if (ret) {
+		dev_warn(data->dev,
+			 "Partial TLB flush timed out, falling back to full flush\n");
+		mtk_iommu_tlb_flush_all(data);
+	}
+	/* Clear the CPE status */
+	writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
+}
+
+static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
+{
+	struct mtk_iommu_data *data = dev_id;
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+	u32 int_state, regval, fault_iova, fault_pa;
+	unsigned int fault_larb, fault_port;
+
+	/* Read error information from registers */
+	int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST);
+	fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
+
+	fault_iova &= F_MMU_FAULT_VA_MSK;
+	fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
+	regval = readl_relaxed(data->base + REG_MMU_INT_ID);
+	fault_larb = MT2701_M4U_TF_LARB(regval);
+	fault_port = MT2701_M4U_TF_PORT(regval);
+
+	/*
+	 * MTK v1 iommu HW could not determine whether the fault is read or
+	 * write fault, report as read fault.
+	 */
+	if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
+			IOMMU_FAULT_READ))
+		dev_err_ratelimited(data->dev,
+			"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d\n",
+			int_state, fault_iova, fault_pa,
+			fault_larb, fault_port);
+
+	/* Interrupt clear */
+	regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL);
+	regval |= F_INT_CLR_BIT;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL);
+
+	mtk_iommu_tlb_flush_all(data);
+
+	return IRQ_HANDLED;
+}
+
+static void mtk_iommu_config(struct mtk_iommu_data *data,
+			     struct device *dev, bool enable)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+	struct mtk_smi_larb_iommu    *larb_mmu;
+	unsigned int                 larbid, portid;
+
+	head = dev->archdata.iommu;
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		larbid = mt2701_m4u_to_larb(cur->mtk_m4u_id);
+		portid = mt2701_m4u_to_port(cur->mtk_m4u_id);
+		larb_mmu = &data->smi_imu.larb_imu[larbid];
+
+		dev_dbg(dev, "%s iommu port: %d\n",
+			enable ? "enable" : "disable", portid);
+
+		if (enable)
+			larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
+		else
+			larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid);
+	}
+}
+
+static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
+{
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+
+	spin_lock_init(&dom->pgtlock);
+
+	dom->pgt_va = dma_zalloc_coherent(data->dev,
+				M2701_IOMMU_PGT_SIZE,
+				&dom->pgt_pa, GFP_KERNEL);
+	if (!dom->pgt_va)
+		return -ENOMEM;
+
+	writel(dom->pgt_pa, data->base + REG_MMU_PT_BASE_ADDR);
+
+	dom->data = data;
+
+	return 0;
+}
+
+static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
+{
+	struct mtk_iommu_domain *dom;
+
+	if (type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+	if (!dom)
+		return NULL;
+
+	return &dom->domain;
+}
+
+static void mtk_iommu_domain_free(struct iommu_domain *domain)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	struct mtk_iommu_data *data = dom->data;
+
+	dma_free_coherent(data->dev, M2701_IOMMU_PGT_SIZE,
+			dom->pgt_va, dom->pgt_pa);
+	kfree(to_mtk_domain(domain));
+}
+
+static int mtk_iommu_attach_device(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+	int ret;
+
+	if (!priv)
+		return -ENODEV;
+
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data->m4u_dom) {
+		data->m4u_dom = dom;
+		ret = mtk_iommu_domain_finalise(data);
+		if (ret) {
+			data->m4u_dom = NULL;
+			return ret;
+		}
+	}
+
+	mtk_iommu_config(data, dev, true);
+	return 0;
+}
+
+static void mtk_iommu_detach_device(struct iommu_domain *domain,
+				    struct device *dev)
+{
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+
+	if (!priv)
+		return;
+
+	data = dev_get_drvdata(priv->m4udev);
+	mtk_iommu_config(data, dev, false);
+}
+
+static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			 phys_addr_t paddr, size_t size, int prot)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
+	unsigned long flags;
+	unsigned int i;
+	u32 *pgt_base_iova = dom->pgt_va + (iova  >> MT2701_IOMMU_PAGE_SHIFT);
+	u32 pabase = (u32)paddr;
+	int map_size = 0;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	for (i = 0; i < page_num; i++) {
+		if (pgt_base_iova[i]) {
+			memset(pgt_base_iova, 0, i * sizeof(u32));
+			break;
+		}
+		pgt_base_iova[i] = pabase | F_DESC_VALID | F_DESC_NONSEC;
+		pabase += MT2701_IOMMU_PAGE_SIZE;
+		map_size += MT2701_IOMMU_PAGE_SIZE;
+	}
+
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	mtk_iommu_tlb_flush_range(dom->data, iova, size);
+
+	return map_size == size ? 0 : -EEXIST;
+}
+
+static size_t mtk_iommu_unmap(struct iommu_domain *domain,
+			      unsigned long iova, size_t size)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	u32 *pgt_base_iova = dom->pgt_va + (iova  >> MT2701_IOMMU_PAGE_SHIFT);
+	unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	memset(pgt_base_iova, 0, page_num * sizeof(u32));
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	mtk_iommu_tlb_flush_range(dom->data, iova, size);
+
+	return size;
+}
+
+static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
+					  dma_addr_t iova)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	phys_addr_t pa;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	pa = *(dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT));
+	pa = pa & (~(MT2701_IOMMU_PAGE_SIZE - 1));
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return pa;
+}
+
+/*
+ * MTK generation one iommu HW only support one iommu domain, and all the client
+ * sharing the same iova address space.
+ */
+static int mtk_iommu_create_mapping(struct device *dev,
+				    struct of_phandle_args *args)
+{
+	struct mtk_iommu_client_priv *head, *priv, *next;
+	struct platform_device *m4updev;
+	struct dma_iommu_mapping *mtk_mapping;
+	struct device *m4udev;
+	int ret;
+
+	if (args->args_count != 1) {
+		dev_err(dev, "invalid #iommu-cells(%d) property for IOMMU\n",
+			args->args_count);
+		return -EINVAL;
+	}
+
+	if (!dev->archdata.iommu) {
+		/* Get the m4u device */
+		m4updev = of_find_device_by_node(args->np);
+		if (WARN_ON(!m4updev))
+			return -EINVAL;
+
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
+		if (!head)
+			return -ENOMEM;
+
+		dev->archdata.iommu = head;
+		INIT_LIST_HEAD(&head->client);
+		head->m4udev = &m4updev->dev;
+	} else {
+		head = dev->archdata.iommu;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto err_free_mem;
+	}
+	priv->mtk_m4u_id = args->args[0];
+	list_add_tail(&priv->client, &head->client);
+
+	m4udev = head->m4udev;
+	mtk_mapping = m4udev->archdata.iommu;
+	if (!mtk_mapping) {
+		/* MTK iommu support 4GB iova address space. */
+		mtk_mapping = arm_iommu_create_mapping(&platform_bus_type,
+						0, 1ULL << 32);
+		if (IS_ERR(mtk_mapping)) {
+			ret = PTR_ERR(mtk_mapping);
+			goto err_free_mem;
+		}
+		m4udev->archdata.iommu = mtk_mapping;
+	}
+
+	ret = arm_iommu_attach_device(dev, mtk_mapping);
+	if (ret)
+		goto err_release_mapping;
+
+	return 0;
+
+err_release_mapping:
+	arm_iommu_release_mapping(mtk_mapping);
+	m4udev->archdata.iommu = NULL;
+err_free_mem:
+	list_for_each_entry_safe(priv, next, &head->client, client)
+		kfree(priv);
+	kfree(head);
+	dev->archdata.iommu = NULL;
+	return ret;
+}
+
+static int mtk_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+	struct of_phandle_args iommu_spec;
+	struct of_phandle_iterator it;
+	int err;
+
+	of_for_each_phandle(&it, err, dev->of_node, "iommus",
+			"#iommu-cells", 0) {
+		int count = of_phandle_iterator_args(&it, iommu_spec.args,
+					MAX_PHANDLE_ARGS);
+		iommu_spec.np = of_node_get(it.node);
+		iommu_spec.args_count = count;
+
+		mtk_iommu_create_mapping(dev, &iommu_spec);
+		of_node_put(iommu_spec.np);
+	}
+
+	if (!dev->archdata.iommu) /* Not a iommu client device */
+		return -ENODEV;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	iommu_group_put(group);
+	return 0;
+}
+
+static void mtk_iommu_remove_device(struct device *dev)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+
+	head = dev->archdata.iommu;
+	if (!head)
+		return;
+
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		list_del(&cur->client);
+		kfree(cur);
+	}
+	kfree(head);
+	dev->archdata.iommu = NULL;
+
+	iommu_group_remove_device(dev);
+}
+
+static struct iommu_group *mtk_iommu_device_group(struct device *dev)
+{
+	struct mtk_iommu_data *data;
+	struct mtk_iommu_client_priv *priv;
+
+	priv = dev->archdata.iommu;
+	if (!priv)
+		return ERR_PTR(-ENODEV);
+
+	/* All the client devices are in the same m4u iommu-group */
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data->m4u_group) {
+		data->m4u_group = iommu_group_alloc();
+		if (IS_ERR(data->m4u_group))
+			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
+	}
+	return data->m4u_group;
+}
+
+static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
+{
+	u32 regval;
+	int ret;
+
+	ret = clk_prepare_enable(data->bclk);
+	if (ret) {
+		dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret);
+		return ret;
+	}
+
+	regval = F_MMU_CTRL_COHERENT_EN | F_MMU_TF_PROTECT_SEL(2);
+	writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
+
+	regval = F_INT_TRANSLATION_FAULT |
+		F_INT_MAIN_MULTI_HIT_FAULT |
+		F_INT_INVALID_PA_FAULT |
+		F_INT_ENTRY_REPLACEMENT_FAULT |
+		F_INT_TABLE_WALK_FAULT |
+		F_INT_TLB_MISS_FAULT |
+		F_INT_PFH_DMA_FIFO_OVERFLOW |
+		F_INT_MISS_DMA_FIFO_OVERFLOW;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL);
+
+	/* protect memory,hw will write here while translation fault */
+	writel_relaxed(data->protect_base,
+			data->base + REG_MMU_IVRP_PADDR);
+
+	writel_relaxed(F_MMU_DCM_ON, data->base + REG_MMU_DCM);
+
+	if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
+			     dev_name(data->dev), (void *)data)) {
+		writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR);
+		clk_disable_unprepare(data->bclk);
+		dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static struct iommu_ops mtk_iommu_ops = {
+	.domain_alloc	= mtk_iommu_domain_alloc,
+	.domain_free	= mtk_iommu_domain_free,
+	.attach_dev	= mtk_iommu_attach_device,
+	.detach_dev	= mtk_iommu_detach_device,
+	.map		= mtk_iommu_map,
+	.unmap		= mtk_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
+	.iova_to_phys	= mtk_iommu_iova_to_phys,
+	.add_device	= mtk_iommu_add_device,
+	.remove_device	= mtk_iommu_remove_device,
+	.device_group	= mtk_iommu_device_group,
+	.pgsize_bitmap	= ~0UL << MT2701_IOMMU_PAGE_SHIFT,
+};
+
+static const struct of_device_id mtk_iommu_of_ids[] = {
+	{ .compatible = "mediatek,mt2701-m4u", },
+	{}
+};
+
+static const struct component_master_ops mtk_iommu_com_ops = {
+	.bind		= mtk_iommu_bind,
+	.unbind		= mtk_iommu_unbind,
+};
+
+static int mtk_iommu_probe(struct platform_device *pdev)
+{
+	struct mtk_iommu_data		*data;
+	struct device			*dev = &pdev->dev;
+	struct resource			*res;
+	struct component_match		*match = NULL;
+	struct of_phandle_args		larb_spec;
+	struct of_phandle_iterator	it;
+	void				*protect;
+	int				larb_nr, ret, err;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->dev = dev;
+
+	/* Protect memory. HW will access here while translation fault.*/
+	protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2,
+			GFP_KERNEL | GFP_DMA);
+	if (!protect)
+		return -ENOMEM;
+	data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->base))
+		return PTR_ERR(data->base);
+
+	data->irq = platform_get_irq(pdev, 0);
+	if (data->irq < 0)
+		return data->irq;
+
+	data->bclk = devm_clk_get(dev, "bclk");
+	if (IS_ERR(data->bclk))
+		return PTR_ERR(data->bclk);
+
+	larb_nr = 0;
+	of_for_each_phandle(&it, err, dev->of_node,
+			"mediatek,larbs", NULL, 0) {
+		struct platform_device *plarbdev;
+		int count = of_phandle_iterator_args(&it, larb_spec.args,
+					MAX_PHANDLE_ARGS);
+
+		if (count)
+			continue;
+
+		larb_spec.np = of_node_get(it.node);
+		if (!of_device_is_available(larb_spec.np))
+			continue;
+
+		plarbdev = of_find_device_by_node(larb_spec.np);
+		of_node_put(larb_spec.np);
+		if (!plarbdev) {
+			plarbdev = of_platform_device_create(
+						larb_spec.np, NULL,
+						platform_bus_type.dev_root);
+			if (!plarbdev)
+				return -EPROBE_DEFER;
+		}
+
+		data->smi_imu.larb_imu[larb_nr].dev = &plarbdev->dev;
+		component_match_add(dev, &match, compare_of, larb_spec.np);
+		larb_nr++;
+	}
+
+	data->smi_imu.larb_nr = larb_nr;
+
+	platform_set_drvdata(pdev, data);
+
+	ret = mtk_iommu_hw_init(data);
+	if (ret)
+		return ret;
+
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type,  &mtk_iommu_ops);
+
+	return component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
+}
+
+static int mtk_iommu_remove(struct platform_device *pdev)
+{
+	struct mtk_iommu_data *data = platform_get_drvdata(pdev);
+
+	if (iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, NULL);
+
+	clk_disable_unprepare(data->bclk);
+	devm_free_irq(&pdev->dev, data->irq, data);
+	component_master_del(&pdev->dev, &mtk_iommu_com_ops);
+	return 0;
+}
+
+static int __maybe_unused mtk_iommu_suspend(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	reg->standard_axi_mode = readl_relaxed(base +
+					       REG_MMU_STANDARD_AXI_MODE);
+	reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM);
+	reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
+	reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL);
+	return 0;
+}
+
+static int __maybe_unused mtk_iommu_resume(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	writel_relaxed(data->m4u_dom->pgt_pa, base + REG_MMU_PT_BASE_ADDR);
+	writel_relaxed(reg->standard_axi_mode,
+		       base + REG_MMU_STANDARD_AXI_MODE);
+	writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM);
+	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
+	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL);
+	writel_relaxed(data->protect_base, base + REG_MMU_IVRP_PADDR);
+	return 0;
+}
+
+static const struct dev_pm_ops mtk_iommu_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
+};
+
+static struct platform_driver mtk_iommu_driver = {
+	.probe	= mtk_iommu_probe,
+	.remove	= mtk_iommu_remove,
+	.driver	= {
+		.name = "mtk-iommu",
+		.of_match_table = mtk_iommu_of_ids,
+		.pm = &mtk_iommu_pm_ops,
+	}
+};
+
+static int __init m4u_init(void)
+{
+	return platform_driver_register(&mtk_iommu_driver);
+}
+
+static void __exit m4u_exit(void)
+{
+	return platform_driver_unregister(&mtk_iommu_driver);
+}
+
+subsys_initcall(m4u_init);
+module_exit(m4u_exit);
+
+MODULE_DESCRIPTION("IOMMU API for MTK architected m4u v1 implementations");
+MODULE_AUTHOR("Honghui Zhang <honghui.zhang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 25b4627cb57f..9afcbf79f0b0 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -4,11 +4,10 @@
  * published by the Free Software Foundation.
  */
 
-#include <asm/cacheflush.h>
-#include <asm/pgtable.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/dma-iommu.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -77,7 +76,9 @@
 
 struct rk_iommu_domain {
 	struct list_head iommus;
+	struct platform_device *pdev;
 	u32 *dt; /* page directory table */
+	dma_addr_t dt_dma;
 	spinlock_t iommus_lock; /* lock for iommus list */
 	spinlock_t dt_lock; /* lock for modifying page directory table */
 
@@ -93,14 +94,12 @@ struct rk_iommu {
 	struct iommu_domain *domain; /* domain to which iommu is attached */
 };
 
-static inline void rk_table_flush(u32 *va, unsigned int count)
+static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
+				  unsigned int count)
 {
-	phys_addr_t pa_start = virt_to_phys(va);
-	phys_addr_t pa_end = virt_to_phys(va + count);
-	size_t size = pa_end - pa_start;
+	size_t size = count * sizeof(u32); /* count of u32 entry */
 
-	__cpuc_flush_dcache_area(va, size);
-	outer_flush_range(pa_start, pa_end);
+	dma_sync_single_for_device(&dom->pdev->dev, dma, size, DMA_TO_DEVICE);
 }
 
 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -183,10 +182,9 @@ static inline bool rk_dte_is_pt_valid(u32 dte)
 	return dte & RK_DTE_PT_VALID;
 }
 
-static u32 rk_mk_dte(u32 *pt)
+static inline u32 rk_mk_dte(dma_addr_t pt_dma)
 {
-	phys_addr_t pt_phys = virt_to_phys(pt);
-	return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
+	return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
 }
 
 /*
@@ -603,13 +601,16 @@ static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain,
 static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 				  dma_addr_t iova)
 {
+	struct device *dev = &rk_domain->pdev->dev;
 	u32 *page_table, *dte_addr;
-	u32 dte;
+	u32 dte_index, dte;
 	phys_addr_t pt_phys;
+	dma_addr_t pt_dma;
 
 	assert_spin_locked(&rk_domain->dt_lock);
 
-	dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)];
+	dte_index = rk_iova_dte_index(iova);
+	dte_addr = &rk_domain->dt[dte_index];
 	dte = *dte_addr;
 	if (rk_dte_is_pt_valid(dte))
 		goto done;
@@ -618,19 +619,27 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 	if (!page_table)
 		return ERR_PTR(-ENOMEM);
 
-	dte = rk_mk_dte(page_table);
-	*dte_addr = dte;
+	pt_dma = dma_map_single(dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pt_dma)) {
+		dev_err(dev, "DMA mapping error while allocating page table\n");
+		free_page((unsigned long)page_table);
+		return ERR_PTR(-ENOMEM);
+	}
 
-	rk_table_flush(page_table, NUM_PT_ENTRIES);
-	rk_table_flush(dte_addr, 1);
+	dte = rk_mk_dte(pt_dma);
+	*dte_addr = dte;
 
+	rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES);
+	rk_table_flush(rk_domain,
+		       rk_domain->dt_dma + dte_index * sizeof(u32), 1);
 done:
 	pt_phys = rk_dte_pt_address(dte);
 	return (u32 *)phys_to_virt(pt_phys);
 }
 
 static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
-				  u32 *pte_addr, dma_addr_t iova, size_t size)
+				  u32 *pte_addr, dma_addr_t pte_dma,
+				  size_t size)
 {
 	unsigned int pte_count;
 	unsigned int pte_total = size / SPAGE_SIZE;
@@ -645,14 +654,14 @@ static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
 		pte_addr[pte_count] = rk_mk_pte_invalid(pte);
 	}
 
-	rk_table_flush(pte_addr, pte_count);
+	rk_table_flush(rk_domain, pte_dma, pte_count);
 
 	return pte_count * SPAGE_SIZE;
 }
 
 static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
-			     dma_addr_t iova, phys_addr_t paddr, size_t size,
-			     int prot)
+			     dma_addr_t pte_dma, dma_addr_t iova,
+			     phys_addr_t paddr, size_t size, int prot)
 {
 	unsigned int pte_count;
 	unsigned int pte_total = size / SPAGE_SIZE;
@@ -671,7 +680,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
 		paddr += SPAGE_SIZE;
 	}
 
-	rk_table_flush(pte_addr, pte_count);
+	rk_table_flush(rk_domain, pte_dma, pte_total);
 
 	/*
 	 * Zap the first and last iova to evict from iotlb any previously
@@ -684,7 +693,8 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
 	return 0;
 unwind:
 	/* Unmap the range of iovas that we just mapped */
-	rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE);
+	rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma,
+			    pte_count * SPAGE_SIZE);
 
 	iova += pte_count * SPAGE_SIZE;
 	page_phys = rk_pte_page_address(pte_addr[pte_count]);
@@ -699,8 +709,9 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
 {
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
-	dma_addr_t iova = (dma_addr_t)_iova;
+	dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
 	u32 *page_table, *pte_addr;
+	u32 dte_index, pte_index;
 	int ret;
 
 	spin_lock_irqsave(&rk_domain->dt_lock, flags);
@@ -718,8 +729,13 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
 		return PTR_ERR(page_table);
 	}
 
-	pte_addr = &page_table[rk_iova_pte_index(iova)];
-	ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot);
+	dte_index = rk_domain->dt[rk_iova_dte_index(iova)];
+	pte_index = rk_iova_pte_index(iova);
+	pte_addr = &page_table[pte_index];
+	pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32);
+	ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova,
+				paddr, size, prot);
+
 	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
 
 	return ret;
@@ -730,7 +746,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
 {
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
-	dma_addr_t iova = (dma_addr_t)_iova;
+	dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
 	phys_addr_t pt_phys;
 	u32 dte;
 	u32 *pte_addr;
@@ -754,7 +770,8 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
 
 	pt_phys = rk_dte_pt_address(dte);
 	pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
-	unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size);
+	pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32);
+	unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size);
 
 	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
 
@@ -787,7 +804,6 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
 	int ret, i;
-	phys_addr_t dte_addr;
 
 	/*
 	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
@@ -807,14 +823,14 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 
 	iommu->domain = domain;
 
-	ret = devm_request_irq(dev, iommu->irq, rk_iommu_irq,
+	ret = devm_request_irq(iommu->dev, iommu->irq, rk_iommu_irq,
 			       IRQF_SHARED, dev_name(dev), iommu);
 	if (ret)
 		return ret;
 
-	dte_addr = virt_to_phys(rk_domain->dt);
 	for (i = 0; i < iommu->num_mmu; i++) {
-		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
+		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
+			       rk_domain->dt_dma);
 		rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
 	}
@@ -860,7 +876,7 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 	}
 	rk_iommu_disable_stall(iommu);
 
-	devm_free_irq(dev, iommu->irq, iommu);
+	devm_free_irq(iommu->dev, iommu->irq, iommu);
 
 	iommu->domain = NULL;
 
@@ -870,14 +886,30 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 {
 	struct rk_iommu_domain *rk_domain;
+	struct platform_device *pdev;
+	struct device *iommu_dev;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
+	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
 		return NULL;
 
-	rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL);
-	if (!rk_domain)
+	/* Register a pdev per domain, so DMA API can base on this *dev
+	 * even some virtual master doesn't have an iommu slave
+	 */
+	pdev = platform_device_register_simple("rk_iommu_domain",
+					       PLATFORM_DEVID_AUTO, NULL, 0);
+	if (IS_ERR(pdev))
 		return NULL;
 
+	rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL);
+	if (!rk_domain)
+		goto err_unreg_pdev;
+
+	rk_domain->pdev = pdev;
+
+	if (type == IOMMU_DOMAIN_DMA &&
+	    iommu_get_dma_cookie(&rk_domain->domain))
+		goto err_unreg_pdev;
+
 	/*
 	 * rk32xx iommus use a 2 level pagetable.
 	 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
@@ -885,18 +917,36 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 	 */
 	rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32);
 	if (!rk_domain->dt)
-		goto err_dt;
+		goto err_put_cookie;
+
+	iommu_dev = &pdev->dev;
+	rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt,
+					   SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(iommu_dev, rk_domain->dt_dma)) {
+		dev_err(iommu_dev, "DMA map error for DT\n");
+		goto err_free_dt;
+	}
 
-	rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES);
+	rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES);
 
 	spin_lock_init(&rk_domain->iommus_lock);
 	spin_lock_init(&rk_domain->dt_lock);
 	INIT_LIST_HEAD(&rk_domain->iommus);
 
+	rk_domain->domain.geometry.aperture_start = 0;
+	rk_domain->domain.geometry.aperture_end   = DMA_BIT_MASK(32);
+	rk_domain->domain.geometry.force_aperture = true;
+
 	return &rk_domain->domain;
 
-err_dt:
-	kfree(rk_domain);
+err_free_dt:
+	free_page((unsigned long)rk_domain->dt);
+err_put_cookie:
+	if (type == IOMMU_DOMAIN_DMA)
+		iommu_put_dma_cookie(&rk_domain->domain);
+err_unreg_pdev:
+	platform_device_unregister(pdev);
+
 	return NULL;
 }
 
@@ -912,12 +962,20 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
 		if (rk_dte_is_pt_valid(dte)) {
 			phys_addr_t pt_phys = rk_dte_pt_address(dte);
 			u32 *page_table = phys_to_virt(pt_phys);
+			dma_unmap_single(&rk_domain->pdev->dev, pt_phys,
+					 SPAGE_SIZE, DMA_TO_DEVICE);
 			free_page((unsigned long)page_table);
 		}
 	}
 
+	dma_unmap_single(&rk_domain->pdev->dev, rk_domain->dt_dma,
+			 SPAGE_SIZE, DMA_TO_DEVICE);
 	free_page((unsigned long)rk_domain->dt);
-	kfree(rk_domain);
+
+	if (domain->type == IOMMU_DOMAIN_DMA)
+		iommu_put_dma_cookie(&rk_domain->domain);
+
+	platform_device_unregister(rk_domain->pdev);
 }
 
 static bool rk_iommu_is_dev_iommu_master(struct device *dev)
@@ -1022,17 +1080,43 @@ static const struct iommu_ops rk_iommu_ops = {
 	.detach_dev = rk_iommu_detach_device,
 	.map = rk_iommu_map,
 	.unmap = rk_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.add_device = rk_iommu_add_device,
 	.remove_device = rk_iommu_remove_device,
 	.iova_to_phys = rk_iommu_iova_to_phys,
 	.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
 };
 
+static int rk_iommu_domain_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), GFP_KERNEL);
+	if (!dev->dma_parms)
+		return -ENOMEM;
+
+	/* Set dma_ops for dev, otherwise it would be dummy_dma_ops */
+	arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false);
+
+	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
+	dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+
+	return 0;
+}
+
+static struct platform_driver rk_iommu_domain_driver = {
+	.probe = rk_iommu_domain_probe,
+	.driver = {
+		   .name = "rk_iommu_domain",
+	},
+};
+
 static int rk_iommu_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct rk_iommu *iommu;
 	struct resource *res;
+	int num_res = pdev->num_resources;
 	int i;
 
 	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
@@ -1042,12 +1126,13 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, iommu);
 	iommu->dev = dev;
 	iommu->num_mmu = 0;
-	iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * iommu->num_mmu,
+
+	iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * num_res,
 				    GFP_KERNEL);
 	if (!iommu->bases)
 		return -ENOMEM;
 
-	for (i = 0; i < pdev->num_resources; i++) {
+	for (i = 0; i < num_res; i++) {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
 		if (!res)
 			continue;
@@ -1103,11 +1188,19 @@ static int __init rk_iommu_init(void)
 	if (ret)
 		return ret;
 
-	return platform_driver_register(&rk_iommu_driver);
+	ret = platform_driver_register(&rk_iommu_domain_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&rk_iommu_driver);
+	if (ret)
+		platform_driver_unregister(&rk_iommu_domain_driver);
+	return ret;
 }
 static void __exit rk_iommu_exit(void)
 {
 	platform_driver_unregister(&rk_iommu_driver);
+	platform_driver_unregister(&rk_iommu_domain_driver);
 }
 
 subsys_initcall(rk_iommu_init);
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index beb2841ceae5..3f1ab4986cfc 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -779,11 +779,31 @@ static const struct v4l2_dv_timings_cap adv76xx_timings_cap_digital = {
 			V4L2_DV_BT_CAP_CUSTOM)
 };
 
-static inline const struct v4l2_dv_timings_cap *
-adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd)
+/*
+ * Return the DV timings capabilities for the requested sink pad. As a special
+ * case, pad value -1 returns the capabilities for the currently selected input.
+ */
+static const struct v4l2_dv_timings_cap *
+adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd, int pad)
 {
-	return is_digital_input(sd) ? &adv76xx_timings_cap_digital :
-				      &adv7604_timings_cap_analog;
+	if (pad == -1) {
+		struct adv76xx_state *state = to_state(sd);
+
+		pad = state->selected_input;
+	}
+
+	switch (pad) {
+	case ADV76XX_PAD_HDMI_PORT_A:
+	case ADV7604_PAD_HDMI_PORT_B:
+	case ADV7604_PAD_HDMI_PORT_C:
+	case ADV7604_PAD_HDMI_PORT_D:
+		return &adv76xx_timings_cap_digital;
+
+	case ADV7604_PAD_VGA_RGB:
+	case ADV7604_PAD_VGA_COMP:
+	default:
+		return &adv7604_timings_cap_analog;
+	}
 }
 
 
@@ -1329,7 +1349,7 @@ static int stdi2dv_timings(struct v4l2_subdev *sd,
 		const struct v4l2_bt_timings *bt = &v4l2_dv_timings_presets[i].bt;
 
 		if (!v4l2_valid_dv_timings(&v4l2_dv_timings_presets[i],
-					   adv76xx_get_dv_timings_cap(sd),
+					   adv76xx_get_dv_timings_cap(sd, -1),
 					   adv76xx_check_dv_timings, NULL))
 			continue;
 		if (vtotal(bt) != stdi->lcf + 1)
@@ -1430,18 +1450,22 @@ static int adv76xx_enum_dv_timings(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	return v4l2_enum_dv_timings_cap(timings,
-		adv76xx_get_dv_timings_cap(sd), adv76xx_check_dv_timings, NULL);
+		adv76xx_get_dv_timings_cap(sd, timings->pad),
+		adv76xx_check_dv_timings, NULL);
 }
 
 static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
 			struct v4l2_dv_timings_cap *cap)
 {
 	struct adv76xx_state *state = to_state(sd);
+	unsigned int pad = cap->pad;
 
 	if (cap->pad >= state->source_pad)
 		return -EINVAL;
 
-	*cap = *adv76xx_get_dv_timings_cap(sd);
+	*cap = *adv76xx_get_dv_timings_cap(sd, pad);
+	cap->pad = pad;
+
 	return 0;
 }
 
@@ -1450,9 +1474,9 @@ static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
 static void adv76xx_fill_optional_dv_timings_fields(struct v4l2_subdev *sd,
 		struct v4l2_dv_timings *timings)
 {
-	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd),
-			is_digital_input(sd) ? 250000 : 1000000,
-			adv76xx_check_dv_timings, NULL);
+	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd, -1),
+				 is_digital_input(sd) ? 250000 : 1000000,
+				 adv76xx_check_dv_timings, NULL);
 }
 
 static unsigned int adv7604_read_hdmi_pixelclock(struct v4l2_subdev *sd)
@@ -1620,7 +1644,7 @@ static int adv76xx_s_dv_timings(struct v4l2_subdev *sd,
 
 	bt = &timings->bt;
 
-	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd),
+	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd, -1),
 				   adv76xx_check_dv_timings, NULL))
 		return -ERANGE;
 
diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c
index 87c12930416f..92d9d4214c3a 100644
--- a/drivers/media/usb/airspy/airspy.c
+++ b/drivers/media/usb/airspy/airspy.c
@@ -1072,7 +1072,7 @@ static int airspy_probe(struct usb_interface *intf,
 	if (ret) {
 		dev_err(s->dev, "Failed to register as video device (%d)\n",
 				ret);
-		goto err_unregister_v4l2_dev;
+		goto err_free_controls;
 	}
 	dev_info(s->dev, "Registered as %s\n",
 			video_device_node_name(&s->vdev));
@@ -1081,7 +1081,6 @@ static int airspy_probe(struct usb_interface *intf,
 
 err_free_controls:
 	v4l2_ctrl_handler_free(&s->hdl);
-err_unregister_v4l2_dev:
 	v4l2_device_unregister(&s->v4l2_dev);
 err_free_mem:
 	kfree(s);
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 28e5be2c2eef..528390f33b53 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2171,7 +2171,7 @@ static int v4l_cropcap(const struct v4l2_ioctl_ops *ops,
 	 * The determine_valid_ioctls() call already should ensure
 	 * that this can never happen, but just in case...
 	 */
-	if (WARN_ON(!ops->vidioc_cropcap && !ops->vidioc_cropcap))
+	if (WARN_ON(!ops->vidioc_cropcap && !ops->vidioc_g_selection))
 		return -ENOTTY;
 
 	if (ops->vidioc_cropcap)
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index f6b57579185a..4afbc412f959 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -21,19 +21,50 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <soc/mediatek/smi.h>
+#include <dt-bindings/memory/mt2701-larb-port.h>
 
 #define SMI_LARB_MMU_EN		0xf00
+#define REG_SMI_SECUR_CON_BASE		0x5c0
+
+/* every register control 8 port, register offset 0x4 */
+#define REG_SMI_SECUR_CON_OFFSET(id)	(((id) >> 3) << 2)
+#define REG_SMI_SECUR_CON_ADDR(id)	\
+	(REG_SMI_SECUR_CON_BASE + REG_SMI_SECUR_CON_OFFSET(id))
+
+/*
+ * every port have 4 bit to control, bit[port + 3] control virtual or physical,
+ * bit[port + 2 : port + 1] control the domain, bit[port] control the security
+ * or non-security.
+ */
+#define SMI_SECUR_CON_VAL_MSK(id)	(~(0xf << (((id) & 0x7) << 2)))
+#define SMI_SECUR_CON_VAL_VIRT(id)	BIT((((id) & 0x7) << 2) + 3)
+/* mt2701 domain should be set to 3 */
+#define SMI_SECUR_CON_VAL_DOMAIN(id)	(0x3 << ((((id) & 0x7) << 2) + 1))
+
+struct mtk_smi_larb_gen {
+	int port_in_larb[MTK_LARB_NR_MAX + 1];
+	void (*config_port)(struct device *);
+};
 
 struct mtk_smi {
-	struct device	*dev;
-	struct clk	*clk_apb, *clk_smi;
+	struct device			*dev;
+	struct clk			*clk_apb, *clk_smi;
+	struct clk			*clk_async; /*only needed by mt2701*/
+	void __iomem			*smi_ao_base;
 };
 
 struct mtk_smi_larb { /* larb: local arbiter */
-	struct mtk_smi  smi;
-	void __iomem	*base;
-	struct device	*smi_common_dev;
-	u32		*mmu;
+	struct mtk_smi			smi;
+	void __iomem			*base;
+	struct device			*smi_common_dev;
+	const struct mtk_smi_larb_gen	*larb_gen;
+	int				larbid;
+	u32				*mmu;
+};
+
+enum mtk_smi_gen {
+	MTK_SMI_GEN1,
+	MTK_SMI_GEN2
 };
 
 static int mtk_smi_enable(const struct mtk_smi *smi)
@@ -71,6 +102,7 @@ static void mtk_smi_disable(const struct mtk_smi *smi)
 int mtk_smi_larb_get(struct device *larbdev)
 {
 	struct mtk_smi_larb *larb = dev_get_drvdata(larbdev);
+	const struct mtk_smi_larb_gen *larb_gen = larb->larb_gen;
 	struct mtk_smi *common = dev_get_drvdata(larb->smi_common_dev);
 	int ret;
 
@@ -87,7 +119,7 @@ int mtk_smi_larb_get(struct device *larbdev)
 	}
 
 	/* Configure the iommu info for this larb */
-	writel(*larb->mmu, larb->base + SMI_LARB_MMU_EN);
+	larb_gen->config_port(larbdev);
 
 	return 0;
 }
@@ -126,6 +158,45 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data)
 	return -ENODEV;
 }
 
+static void mtk_smi_larb_config_port(struct device *dev)
+{
+	struct mtk_smi_larb *larb = dev_get_drvdata(dev);
+
+	writel(*larb->mmu, larb->base + SMI_LARB_MMU_EN);
+}
+
+
+static void mtk_smi_larb_config_port_gen1(struct device *dev)
+{
+	struct mtk_smi_larb *larb = dev_get_drvdata(dev);
+	const struct mtk_smi_larb_gen *larb_gen = larb->larb_gen;
+	struct mtk_smi *common = dev_get_drvdata(larb->smi_common_dev);
+	int i, m4u_port_id, larb_port_num;
+	u32 sec_con_val, reg_val;
+
+	m4u_port_id = larb_gen->port_in_larb[larb->larbid];
+	larb_port_num = larb_gen->port_in_larb[larb->larbid + 1]
+			- larb_gen->port_in_larb[larb->larbid];
+
+	for (i = 0; i < larb_port_num; i++, m4u_port_id++) {
+		if (*larb->mmu & BIT(i)) {
+			/* bit[port + 3] controls the virtual or physical */
+			sec_con_val = SMI_SECUR_CON_VAL_VIRT(m4u_port_id);
+		} else {
+			/* do not need to enable m4u for this port */
+			continue;
+		}
+		reg_val = readl(common->smi_ao_base
+			+ REG_SMI_SECUR_CON_ADDR(m4u_port_id));
+		reg_val &= SMI_SECUR_CON_VAL_MSK(m4u_port_id);
+		reg_val |= sec_con_val;
+		reg_val |= SMI_SECUR_CON_VAL_DOMAIN(m4u_port_id);
+		writel(reg_val,
+			common->smi_ao_base
+			+ REG_SMI_SECUR_CON_ADDR(m4u_port_id));
+	}
+}
+
 static void
 mtk_smi_larb_unbind(struct device *dev, struct device *master, void *data)
 {
@@ -137,6 +208,31 @@ static const struct component_ops mtk_smi_larb_component_ops = {
 	.unbind = mtk_smi_larb_unbind,
 };
 
+static const struct mtk_smi_larb_gen mtk_smi_larb_mt8173 = {
+	/* mt8173 do not need the port in larb */
+	.config_port = mtk_smi_larb_config_port,
+};
+
+static const struct mtk_smi_larb_gen mtk_smi_larb_mt2701 = {
+	.port_in_larb = {
+		LARB0_PORT_OFFSET, LARB1_PORT_OFFSET,
+		LARB2_PORT_OFFSET, LARB3_PORT_OFFSET
+	},
+	.config_port = mtk_smi_larb_config_port_gen1,
+};
+
+static const struct of_device_id mtk_smi_larb_of_ids[] = {
+	{
+		.compatible = "mediatek,mt8173-smi-larb",
+		.data = &mtk_smi_larb_mt8173
+	},
+	{
+		.compatible = "mediatek,mt2701-smi-larb",
+		.data = &mtk_smi_larb_mt2701
+	},
+	{}
+};
+
 static int mtk_smi_larb_probe(struct platform_device *pdev)
 {
 	struct mtk_smi_larb *larb;
@@ -144,14 +240,20 @@ static int mtk_smi_larb_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *smi_node;
 	struct platform_device *smi_pdev;
+	const struct of_device_id *of_id;
 
 	if (!dev->pm_domain)
 		return -EPROBE_DEFER;
 
+	of_id = of_match_node(mtk_smi_larb_of_ids, pdev->dev.of_node);
+	if (!of_id)
+		return -EINVAL;
+
 	larb = devm_kzalloc(dev, sizeof(*larb), GFP_KERNEL);
 	if (!larb)
 		return -ENOMEM;
 
+	larb->larb_gen = of_id->data;
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	larb->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(larb->base))
@@ -191,24 +293,34 @@ static int mtk_smi_larb_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id mtk_smi_larb_of_ids[] = {
-	{ .compatible = "mediatek,mt8173-smi-larb",},
-	{}
-};
-
 static struct platform_driver mtk_smi_larb_driver = {
 	.probe	= mtk_smi_larb_probe,
-	.remove = mtk_smi_larb_remove,
+	.remove	= mtk_smi_larb_remove,
 	.driver	= {
 		.name = "mtk-smi-larb",
 		.of_match_table = mtk_smi_larb_of_ids,
 	}
 };
 
+static const struct of_device_id mtk_smi_common_of_ids[] = {
+	{
+		.compatible = "mediatek,mt8173-smi-common",
+		.data = (void *)MTK_SMI_GEN2
+	},
+	{
+		.compatible = "mediatek,mt2701-smi-common",
+		.data = (void *)MTK_SMI_GEN1
+	},
+	{}
+};
+
 static int mtk_smi_common_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_smi *common;
+	struct resource *res;
+	const struct of_device_id *of_id;
+	enum mtk_smi_gen smi_gen;
 
 	if (!dev->pm_domain)
 		return -EPROBE_DEFER;
@@ -226,6 +338,29 @@ static int mtk_smi_common_probe(struct platform_device *pdev)
 	if (IS_ERR(common->clk_smi))
 		return PTR_ERR(common->clk_smi);
 
+	of_id = of_match_node(mtk_smi_common_of_ids, pdev->dev.of_node);
+	if (!of_id)
+		return -EINVAL;
+
+	/*
+	 * for mtk smi gen 1, we need to get the ao(always on) base to config
+	 * m4u port, and we need to enable the aync clock for transform the smi
+	 * clock into emi clock domain, but for mtk smi gen2, there's no smi ao
+	 * base.
+	 */
+	smi_gen = (enum mtk_smi_gen)of_id->data;
+	if (smi_gen == MTK_SMI_GEN1) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		common->smi_ao_base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(common->smi_ao_base))
+			return PTR_ERR(common->smi_ao_base);
+
+		common->clk_async = devm_clk_get(dev, "async");
+		if (IS_ERR(common->clk_async))
+			return PTR_ERR(common->clk_async);
+
+		clk_prepare_enable(common->clk_async);
+	}
 	pm_runtime_enable(dev);
 	platform_set_drvdata(pdev, common);
 	return 0;
@@ -237,11 +372,6 @@ static int mtk_smi_common_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id mtk_smi_common_of_ids[] = {
-	{ .compatible = "mediatek,mt8173-smi-common", },
-	{}
-};
-
 static struct platform_driver mtk_smi_common_driver = {
 	.probe	= mtk_smi_common_probe,
 	.remove = mtk_smi_common_remove,
@@ -272,4 +402,5 @@ err_unreg_smi:
 	platform_driver_unregister(&mtk_smi_common_driver);
 	return ret;
 }
+
 subsys_initcall(mtk_smi_init);
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index e62fde3ac431..c5472e3c9231 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -355,8 +355,10 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
 		goto idata_err;
 	}
 
-	if (!idata->buf_bytes)
+	if (!idata->buf_bytes) {
+		idata->buf = NULL;
 		return idata;
+	}
 
 	idata->buf = kmalloc(idata->buf_bytes, GFP_KERNEL);
 	if (!idata->buf) {
@@ -1786,8 +1788,8 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
 
 	packed_cmd_hdr = packed->cmd_hdr;
 	memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr));
-	packed_cmd_hdr[0] = (packed->nr_entries << 16) |
-		(PACKED_CMD_WR << 8) | PACKED_CMD_VER;
+	packed_cmd_hdr[0] = cpu_to_le32((packed->nr_entries << 16) |
+		(PACKED_CMD_WR << 8) | PACKED_CMD_VER);
 	hdr_blocks = mmc_large_sector(card) ? 8 : 1;
 
 	/*
@@ -1801,14 +1803,14 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
 			((brq->data.blocks * brq->data.blksz) >=
 			 card->ext_csd.data_tag_unit_size);
 		/* Argument of CMD23 */
-		packed_cmd_hdr[(i * 2)] =
+		packed_cmd_hdr[(i * 2)] = cpu_to_le32(
 			(do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) |
 			(do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) |
-			blk_rq_sectors(prq);
+			blk_rq_sectors(prq));
 		/* Argument of CMD18 or CMD25 */
-		packed_cmd_hdr[((i * 2)) + 1] =
+		packed_cmd_hdr[((i * 2)) + 1] = cpu_to_le32(
 			mmc_card_blockaddr(card) ?
-			blk_rq_pos(prq) : blk_rq_pos(prq) << 9;
+			blk_rq_pos(prq) : blk_rq_pos(prq) << 9);
 		packed->blocks += blk_rq_sectors(prq);
 		i++;
 	}
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 86fac3e86833..c763b404510f 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -789,14 +789,16 @@ static int pxamci_probe(struct platform_device *pdev)
 		gpio_direction_output(gpio_power,
 				      host->pdata->gpio_power_invert);
 	}
-	if (gpio_is_valid(gpio_ro))
+	if (gpio_is_valid(gpio_ro)) {
 		ret = mmc_gpio_request_ro(mmc, gpio_ro);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro);
-		goto out;
-	} else {
-		mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
-			0 : MMC_CAP2_RO_ACTIVE_HIGH;
+		if (ret) {
+			dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n",
+				gpio_ro);
+			goto out;
+		} else {
+			mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
+				0 : MMC_CAP2_RO_ACTIVE_HIGH;
+		}
 	}
 
 	if (gpio_is_valid(gpio_cd))
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 08e158895635..a136da8df6fe 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1657,8 +1657,11 @@ static int omap_get_dt_info(struct device *dev, struct omap_nand_info *info)
 
 	/* detect availability of ELM module. Won't be present pre-OMAP4 */
 	info->elm_of_node = of_parse_phandle(child, "ti,elm-id", 0);
-	if (!info->elm_of_node)
-		dev_dbg(dev, "ti,elm-id not in DT\n");
+	if (!info->elm_of_node) {
+		info->elm_of_node = of_parse_phandle(child, "elm_id", 0);
+		if (!info->elm_of_node)
+			dev_dbg(dev, "ti,elm-id not in DT\n");
+	}
 
 	/* select ecc-scheme for NAND */
 	if (of_property_read_string(child, "ti,nand-ecc-opt", &s)) {
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index db760e84119f..b8df0f5e8c25 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
 	if (err < 0)
 		return err;
 
-	return register_netdevice(bond_dev);
+	err = register_netdevice(bond_dev);
+
+	netif_carrier_off(bond_dev);
+
+	return err;
 }
 
 static size_t bond_get_size(const struct net_device *bond_dev)
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 30defe6c81f2..821d86c38ab2 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3851,7 +3851,7 @@ static void et131x_tx_timeout(struct net_device *netdev)
 	unsigned long flags;
 
 	/* If the device is closed, ignore the timeout */
-	if (~(adapter->flags & FMP_ADAPTER_INTERRUPT_IN_USE))
+	if (!(adapter->flags & FMP_ADAPTER_INTERRUPT_IN_USE))
 		return;
 
 	/* Any nonrecoverable hardware error?
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 08a23e6b60e9..1a3555d03a96 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -259,6 +259,7 @@ static void nb8800_receive(struct net_device *dev, unsigned int i,
 		if (err) {
 			netdev_err(dev, "rx buffer allocation failed\n");
 			dev->stats.rx_dropped++;
+			dev_kfree_skb(skb);
 			return;
 		}
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index a6333d38ecc0..25bbae5928d4 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -231,7 +231,7 @@ err_dma:
 	dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
 			 DMA_TO_DEVICE);
 
-	while (i > 0) {
+	while (i-- > 0) {
 		int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
 		struct bgmac_slot_info *slot = &ring->slots[index];
 		u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a38cb047b540..1b0ae4a72e9e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1591,7 +1591,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev,
 {
 	struct bnxt *bp = netdev_priv(dev);
 	u16  start = eeprom->offset, length = eeprom->len;
-	int rc;
+	int rc = 0;
 
 	memset(data, 0, eeprom->len);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8de79ae63231..0e7e7da8d201 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2821,7 +2821,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		if (!g) {
 			netif_info(lio, tx_err, lio->netdev,
 				   "Transmit scatter gather: glist null!\n");
-			goto lio_xmit_failed;
+			goto lio_xmit_dma_failed;
 		}
 
 		cmdsetup.s.gather = 1;
@@ -2892,7 +2892,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	else
 		status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more);
 	if (status == IQ_SEND_FAILED)
-		goto lio_xmit_failed;
+		goto lio_xmit_dma_failed;
 
 	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
 
@@ -2906,12 +2906,13 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	return NETDEV_TX_OK;
 
+lio_xmit_dma_failed:
+	dma_unmap_single(&oct->pci_dev->dev, ndata.cmd.dptr,
+			 ndata.datasize, DMA_TO_DEVICE);
 lio_xmit_failed:
 	stats->tx_dropped++;
 	netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
 		   iq_no, stats->tx_dropped);
-	dma_unmap_single(&oct->pci_dev->dev, ndata.cmd.dptr,
-			 ndata.datasize, DMA_TO_DEVICE);
 	recv_buffer_free(skb);
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 4edb98c3c6c7..4466a1187110 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -860,6 +860,11 @@ static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned int entry;
 	void *dest;
 
+	if (skb_put_padto(skb, ETHOC_ZLEN)) {
+		dev->stats.tx_errors++;
+		goto out_no_free;
+	}
+
 	if (unlikely(skb->len > ETHOC_BUFSIZ)) {
 		dev->stats.tx_errors++;
 		goto out;
@@ -894,6 +899,7 @@ static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_tx_timestamp(skb);
 out:
 	dev_kfree_skb(skb);
+out_no_free:
 	return NETDEV_TX_OK;
 }
 
@@ -1086,7 +1092,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	if (!priv->iobase) {
 		dev_err(&pdev->dev, "cannot remap I/O memory space\n");
 		ret = -ENXIO;
-		goto error;
+		goto free;
 	}
 
 	if (netdev->mem_end) {
@@ -1095,7 +1101,7 @@ static int ethoc_probe(struct platform_device *pdev)
 		if (!priv->membase) {
 			dev_err(&pdev->dev, "cannot remap memory space\n");
 			ret = -ENXIO;
-			goto error;
+			goto free;
 		}
 	} else {
 		/* Allocate buffer memory */
@@ -1106,7 +1112,7 @@ static int ethoc_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "cannot allocate %dB buffer\n",
 				buffer_size);
 			ret = -ENOMEM;
-			goto error;
+			goto free;
 		}
 		netdev->mem_end = netdev->mem_start + buffer_size;
 		priv->dma_alloc = buffer_size;
@@ -1120,7 +1126,7 @@ static int ethoc_probe(struct platform_device *pdev)
 		128, (netdev->mem_end - netdev->mem_start + 1) / ETHOC_BUFSIZ);
 	if (num_bd < 4) {
 		ret = -ENODEV;
-		goto error;
+		goto free;
 	}
 	priv->num_bd = num_bd;
 	/* num_tx must be a power of two */
@@ -1133,7 +1139,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	priv->vma = devm_kzalloc(&pdev->dev, num_bd*sizeof(void *), GFP_KERNEL);
 	if (!priv->vma) {
 		ret = -ENOMEM;
-		goto error;
+		goto free;
 	}
 
 	/* Allow the platform setup code to pass in a MAC address. */
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 06f031715b57..9b7a3f5a2818 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -285,6 +285,7 @@ static void nps_enet_hw_reset(struct net_device *ndev)
 	ge_rst_value |= NPS_ENET_ENABLE << RST_GMAC_0_SHIFT;
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_RST, ge_rst_value);
 	usleep_range(10, 20);
+	ge_rst_value = 0;
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_RST, ge_rst_value);
 
 	/* Tx fifo reset sequence */
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ecdb6854a898..88f3c85fb04a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -75,6 +75,7 @@
 #include <linux/uaccess.h>
 #include <asm/firmware.h>
 #include <linux/seq_file.h>
+#include <linux/workqueue.h>
 
 #include "ibmvnic.h"
 
@@ -89,6 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
 static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -469,7 +471,8 @@ static int ibmvnic_open(struct net_device *netdev)
 	crq.logical_link_state.link_state = IBMVNIC_LOGICAL_LNK_UP;
 	ibmvnic_send_crq(adapter, &crq);
 
-	netif_start_queue(netdev);
+	netif_tx_start_all_queues(netdev);
+
 	return 0;
 
 bounce_map_failed:
@@ -519,7 +522,7 @@ static int ibmvnic_close(struct net_device *netdev)
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_disable(&adapter->napi[i]);
 
-	netif_stop_queue(netdev);
+	netif_tx_stop_all_queues(netdev);
 
 	if (adapter->bounce_buffer) {
 		if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
@@ -1212,12 +1215,6 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 		goto reg_failed;
 	}
 
-	scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
-	if (scrq->irq == NO_IRQ) {
-		dev_err(dev, "Error mapping irq\n");
-		goto map_irq_failed;
-	}
-
 	scrq->adapter = adapter;
 	scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
 	scrq->cur = 0;
@@ -1230,12 +1227,6 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 
 	return scrq;
 
-map_irq_failed:
-	do {
-		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
-					adapter->vdev->unit_address,
-					scrq->crq_num);
-	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 reg_failed:
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 			 DMA_BIDIRECTIONAL);
@@ -1256,6 +1247,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (adapter->tx_scrq[i]) {
 				free_irq(adapter->tx_scrq[i]->irq,
 					 adapter->tx_scrq[i]);
+				irq_dispose_mapping(adapter->tx_scrq[i]->irq);
 				release_sub_crq_queue(adapter,
 						      adapter->tx_scrq[i]);
 			}
@@ -1267,6 +1259,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (adapter->rx_scrq[i]) {
 				free_irq(adapter->rx_scrq[i]->irq,
 					 adapter->rx_scrq[i]);
+				irq_dispose_mapping(adapter->rx_scrq[i]->irq);
 				release_sub_crq_queue(adapter,
 						      adapter->rx_scrq[i]);
 			}
@@ -1276,6 +1269,29 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 	adapter->requested_caps = 0;
 }
 
+static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	if (adapter->tx_scrq) {
+		for (i = 0; i < adapter->req_tx_queues; i++)
+			if (adapter->tx_scrq[i])
+				release_sub_crq_queue(adapter,
+						      adapter->tx_scrq[i]);
+		adapter->tx_scrq = NULL;
+	}
+
+	if (adapter->rx_scrq) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			if (adapter->rx_scrq[i])
+				release_sub_crq_queue(adapter,
+						      adapter->rx_scrq[i]);
+		adapter->rx_scrq = NULL;
+	}
+
+	adapter->requested_caps = 0;
+}
+
 static int disable_scrq_irq(struct ibmvnic_adapter *adapter,
 			    struct ibmvnic_sub_crq_queue *scrq)
 {
@@ -1395,6 +1411,66 @@ static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance)
 	return IRQ_HANDLED;
 }
 
+static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
+{
+	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_sub_crq_queue *scrq;
+	int i = 0, j = 0;
+	int rc = 0;
+
+	for (i = 0; i < adapter->req_tx_queues; i++) {
+		scrq = adapter->tx_scrq[i];
+		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
+
+		if (scrq->irq == NO_IRQ) {
+			rc = -EINVAL;
+			dev_err(dev, "Error mapping irq\n");
+			goto req_tx_irq_failed;
+		}
+
+		rc = request_irq(scrq->irq, ibmvnic_interrupt_tx,
+				 0, "ibmvnic_tx", scrq);
+
+		if (rc) {
+			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
+				scrq->irq, rc);
+			irq_dispose_mapping(scrq->irq);
+			goto req_rx_irq_failed;
+		}
+	}
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		scrq = adapter->rx_scrq[i];
+		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
+		if (scrq->irq == NO_IRQ) {
+			rc = -EINVAL;
+			dev_err(dev, "Error mapping irq\n");
+			goto req_rx_irq_failed;
+		}
+		rc = request_irq(scrq->irq, ibmvnic_interrupt_rx,
+				 0, "ibmvnic_rx", scrq);
+		if (rc) {
+			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
+				scrq->irq, rc);
+			irq_dispose_mapping(scrq->irq);
+			goto req_rx_irq_failed;
+		}
+	}
+	return rc;
+
+req_rx_irq_failed:
+	for (j = 0; j < i; j++)
+		free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
+		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+	i = adapter->req_tx_queues;
+req_tx_irq_failed:
+	for (j = 0; j < i; j++)
+		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
+		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+	release_sub_crqs_no_irqs(adapter);
+	return rc;
+}
+
 static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 {
 	struct device *dev = &adapter->vdev->dev;
@@ -1403,8 +1479,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	union ibmvnic_crq crq;
 	int total_queues;
 	int more = 0;
-	int i, j;
-	int rc;
+	int i;
 
 	if (!retry) {
 		/* Sub-CRQ entries are 32 byte long */
@@ -1483,13 +1558,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	for (i = 0; i < adapter->req_tx_queues; i++) {
 		adapter->tx_scrq[i] = allqueues[i];
 		adapter->tx_scrq[i]->pool_index = i;
-		rc = request_irq(adapter->tx_scrq[i]->irq, ibmvnic_interrupt_tx,
-				 0, "ibmvnic_tx", adapter->tx_scrq[i]);
-		if (rc) {
-			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
-				adapter->tx_scrq[i]->irq, rc);
-			goto req_tx_irq_failed;
-		}
 	}
 
 	adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
@@ -1500,13 +1568,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
 		adapter->rx_scrq[i]->scrq_num = i;
-		rc = request_irq(adapter->rx_scrq[i]->irq, ibmvnic_interrupt_rx,
-				 0, "ibmvnic_rx", adapter->rx_scrq[i]);
-		if (rc) {
-			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
-				adapter->rx_scrq[i]->irq, rc);
-			goto req_rx_irq_failed;
-		}
 	}
 
 	memset(&crq, 0, sizeof(crq));
@@ -1559,15 +1620,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 
 	return;
 
-req_rx_irq_failed:
-	for (j = 0; j < i; j++)
-		free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
-	i = adapter->req_tx_queues;
-req_tx_irq_failed:
-	for (j = 0; j < i; j++)
-		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
-	kfree(adapter->rx_scrq);
-	adapter->rx_scrq = NULL;
 rx_failed:
 	kfree(adapter->tx_scrq);
 	adapter->tx_scrq = NULL;
@@ -2348,9 +2400,9 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
 			 *req_value,
 			 (long int)be32_to_cpu(crq->request_capability_rsp.
 					       number), name);
-		release_sub_crqs(adapter);
+		release_sub_crqs_no_irqs(adapter);
 		*req_value = be32_to_cpu(crq->request_capability_rsp.number);
-		complete(&adapter->init_done);
+		init_sub_crqs(adapter, 1);
 		return;
 	default:
 		dev_err(dev, "Error %d in request cap rsp\n",
@@ -2659,7 +2711,7 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
 
 out:
 	if (atomic_read(&adapter->running_cap_queries) == 0)
-		complete(&adapter->init_done);
+		init_sub_crqs(adapter, 0);
 		/* We're done querying the capabilities, initialize sub-crqs */
 }
 
@@ -3202,8 +3254,8 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			dev_info(dev, "Partner initialized\n");
 			/* Send back a response */
 			rc = ibmvnic_send_crq_init_complete(adapter);
-			if (rc == 0)
-				send_version_xchg(adapter);
+			if (!rc)
+				schedule_work(&adapter->vnic_crq_init);
 			else
 				dev_err(dev, "Can't send initrsp rc=%ld\n", rc);
 			break;
@@ -3555,8 +3607,63 @@ static const struct file_operations ibmvnic_dump_ops = {
 	.release        = single_release,
 };
 
+static void handle_crq_init_rsp(struct work_struct *work)
+{
+	struct ibmvnic_adapter *adapter = container_of(work,
+						       struct ibmvnic_adapter,
+						       vnic_crq_init);
+	struct device *dev = &adapter->vdev->dev;
+	struct net_device *netdev = adapter->netdev;
+	unsigned long timeout = msecs_to_jiffies(30000);
+	int rc;
+
+	send_version_xchg(adapter);
+	reinit_completion(&adapter->init_done);
+	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
+		dev_err(dev, "Passive init timeout\n");
+		goto task_failed;
+	}
+
+	do {
+		if (adapter->renegotiate) {
+			adapter->renegotiate = false;
+			release_sub_crqs_no_irqs(adapter);
+			send_cap_queries(adapter);
+
+			reinit_completion(&adapter->init_done);
+			if (!wait_for_completion_timeout(&adapter->init_done,
+							 timeout)) {
+				dev_err(dev, "Passive init timeout\n");
+				goto task_failed;
+			}
+		}
+	} while (adapter->renegotiate);
+	rc = init_sub_crq_irqs(adapter);
+
+	if (rc)
+		goto task_failed;
+
+	netdev->real_num_tx_queues = adapter->req_tx_queues;
+
+	rc = register_netdev(netdev);
+	if (rc) {
+		dev_err(dev,
+			"failed to register netdev rc=%d\n", rc);
+		goto register_failed;
+	}
+	dev_info(dev, "ibmvnic registered\n");
+
+	return;
+
+register_failed:
+	release_sub_crqs(adapter);
+task_failed:
+	dev_err(dev, "Passive initialization was not successful\n");
+}
+
 static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 {
+	unsigned long timeout = msecs_to_jiffies(30000);
 	struct ibmvnic_adapter *adapter;
 	struct net_device *netdev;
 	unsigned char *mac_addr_p;
@@ -3593,6 +3700,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	netdev->ethtool_ops = &ibmvnic_ethtool_ops;
 	SET_NETDEV_DEV(netdev, &dev->dev);
 
+	INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp);
+
 	spin_lock_init(&adapter->stats_lock);
 
 	rc = ibmvnic_init_crq_queue(adapter);
@@ -3635,30 +3744,26 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	ibmvnic_send_crq_init(adapter);
 
 	init_completion(&adapter->init_done);
-	wait_for_completion(&adapter->init_done);
+	if (!wait_for_completion_timeout(&adapter->init_done, timeout))
+		return 0;
 
 	do {
-		adapter->renegotiate = false;
-
-		init_sub_crqs(adapter, 0);
-		reinit_completion(&adapter->init_done);
-		wait_for_completion(&adapter->init_done);
-
 		if (adapter->renegotiate) {
-			release_sub_crqs(adapter);
+			adapter->renegotiate = false;
+			release_sub_crqs_no_irqs(adapter);
 			send_cap_queries(adapter);
 
 			reinit_completion(&adapter->init_done);
-			wait_for_completion(&adapter->init_done);
+			if (!wait_for_completion_timeout(&adapter->init_done,
+							 timeout))
+				return 0;
 		}
 	} while (adapter->renegotiate);
 
-	/* if init_sub_crqs is partially successful, retry */
-	while (!adapter->tx_scrq || !adapter->rx_scrq) {
-		init_sub_crqs(adapter, 1);
-
-		reinit_completion(&adapter->init_done);
-		wait_for_completion(&adapter->init_done);
+	rc = init_sub_crq_irqs(adapter);
+	if (rc) {
+		dev_err(&dev->dev, "failed to initialize sub crq irqs\n");
+		goto free_debugfs;
 	}
 
 	netdev->real_num_tx_queues = adapter->req_tx_queues;
@@ -3666,12 +3771,14 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	rc = register_netdev(netdev);
 	if (rc) {
 		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
-		goto free_debugfs;
+		goto free_sub_crqs;
 	}
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
 	return 0;
 
+free_sub_crqs:
+	release_sub_crqs(adapter);
 free_debugfs:
 	if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir))
 		debugfs_remove_recursive(adapter->debugfs_dir);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 0b66a506a4e4..e82898fd518e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1045,4 +1045,6 @@ struct ibmvnic_adapter {
 	u64 opt_rxba_entries_per_subcrq;
 	__be64 tx_rx_desc_req;
 	u8 map_id;
+
+	struct work_struct vnic_crq_init;
 };
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5ea22008d721..501f15d9f4d6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1344,6 +1344,13 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 	if (!vsi || !macaddr)
 		return NULL;
 
+	/* Do not allow broadcast filter to be added since broadcast filter
+	 * is added as part of add VSI for any newly created VSI except
+	 * FDIR VSI
+	 */
+	if (is_broadcast_ether_addr(macaddr))
+		return NULL;
+
 	f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
 	if (!f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
@@ -2151,18 +2158,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 					 aq_ret, pf->hw.aq.asq_last_status);
 			}
 		}
-		aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw,
-						   vsi->seid,
-						   cur_promisc, NULL);
-		if (aq_ret) {
-			retval = i40e_aq_rc_to_posix(aq_ret,
-						     pf->hw.aq.asq_last_status);
-			dev_info(&pf->pdev->dev,
-				 "set brdcast promisc failed, err %s, aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw,
-					     pf->hw.aq.asq_last_status));
-		}
 	}
 out:
 	/* if something went wrong then set the changed flag so we try again */
@@ -7726,10 +7721,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
  * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
  * @vsi: the VSI being configured
  * @v_idx: index of the vector in the vsi struct
+ * @cpu: cpu to be used on affinity_mask
  *
  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
+static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 {
 	struct i40e_q_vector *q_vector;
 
@@ -7740,7 +7736,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 
 	q_vector->vsi = vsi;
 	q_vector->v_idx = v_idx;
-	cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+	cpumask_set_cpu(cpu, &q_vector->affinity_mask);
+
 	if (vsi->netdev)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
 			       i40e_napi_poll, NAPI_POLL_WEIGHT);
@@ -7764,8 +7761,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
-	int v_idx, num_q_vectors;
-	int err;
+	int err, v_idx, num_q_vectors, current_cpu;
 
 	/* if not MSIX, give the one vector only to the LAN VSI */
 	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -7775,10 +7771,15 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 	else
 		return -EINVAL;
 
+	current_cpu = cpumask_first(cpu_online_mask);
+
 	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
+		err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
 		if (err)
 			goto err_out;
+		current_cpu = cpumask_next(current_cpu, cpu_online_mask);
+		if (unlikely(current_cpu >= nr_cpu_ids))
+			current_cpu = cpumask_first(cpu_online_mask);
 	}
 
 	return 0;
@@ -9224,6 +9225,7 @@ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
 static int i40e_add_vsi(struct i40e_vsi *vsi)
 {
 	int ret = -ENODEV;
+	i40e_status aq_ret = 0;
 	u8 laa_macaddr[ETH_ALEN];
 	bool found_laa_mac_filter = false;
 	struct i40e_pf *pf = vsi->back;
@@ -9413,6 +9415,18 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		vsi->seid = ctxt.seid;
 		vsi->id = ctxt.vsi_number;
 	}
+	/* Except FDIR VSI, for all othet VSI set the broadcast filter */
+	if (vsi->type != I40E_VSI_FDIR) {
+		aq_ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, true, NULL);
+		if (aq_ret) {
+			ret = i40e_aq_rc_to_posix(aq_ret,
+						  hw->aq.asq_last_status);
+			dev_info(&pf->pdev->dev,
+				 "set brdcast promisc failed, err %s, aq_err %s\n",
+				 i40e_stat_str(hw, aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		}
+	}
 
 	spin_lock_bh(&vsi->mac_filter_list_lock);
 	/* If macvlan filters already exist, force them to get loaded */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 55f151fca1dc..a8868e1bf832 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1280,8 +1280,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 				    union i40e_rx_desc *rx_desc)
 {
 	struct i40e_rx_ptype_decoded decoded;
-	bool ipv4, ipv6, tunnel = false;
 	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
@@ -1336,19 +1336,23 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
-	/* The hardware supported by this driver does not validate outer
-	 * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
-	 * with it but the specification states that you "MAY validate", it
-	 * doesn't make it a hard requirement so if we have validated the
-	 * inner checksum report CHECKSUM_UNNECESSARY.
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
-				  I40E_RX_PTYPE_INNER_PROT_UDP |
-				  I40E_RX_PTYPE_INNER_PROT_SCTP))
-		tunnel = true;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb->csum_level = tunnel ? 1 : 0;
+	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case I40E_RX_PTYPE_INNER_PROT_TCP:
+	case I40E_RX_PTYPE_INNER_PROT_UDP:
+	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		/* fall though */
+	default:
+		break;
+	}
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index be99189da925..79d99cd91b24 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -752,8 +752,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 				    union i40e_rx_desc *rx_desc)
 {
 	struct i40e_rx_ptype_decoded decoded;
-	bool ipv4, ipv6, tunnel = false;
 	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
@@ -808,19 +808,23 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
-	/* The hardware supported by this driver does not validate outer
-	 * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
-	 * with it but the specification states that you "MAY validate", it
-	 * doesn't make it a hard requirement so if we have validated the
-	 * inner checksum report CHECKSUM_UNNECESSARY.
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
-				  I40E_RX_PTYPE_INNER_PROT_UDP |
-				  I40E_RX_PTYPE_INNER_PROT_SCTP))
-		tunnel = true;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb->csum_level = tunnel ? 1 : 0;
+	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case I40E_RX_PTYPE_INNER_PROT_TCP:
+	case I40E_RX_PTYPE_INNER_PROT_UDP:
+	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		/* fall though */
+	default:
+		break;
+	}
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 088c47cf27d9..8bebd862a54c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2887,7 +2887,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
 	if (!test_bit(__IXGBE_DOWN, &adapter->state))
 		ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx));
 
-	return 0;
+	return min(work_done, budget - 1);
 }
 
 /**
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d5d263bda333..f92018b13d28 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -244,7 +244,7 @@
 /* Various constants */
 
 /* Coalescing */
-#define MVNETA_TXDONE_COAL_PKTS		1
+#define MVNETA_TXDONE_COAL_PKTS		0	/* interrupt per packet */
 #define MVNETA_RX_COAL_PKTS		32
 #define MVNETA_RX_COAL_USEC		100
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index fc95affaf76b..44cf16d01f42 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1042,6 +1042,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	u32 rx_size, tx_size;
 	int port_up = 0;
 	int err = 0;
@@ -1061,22 +1063,25 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 	    tx_size == priv->tx_ring[0]->size)
 		return 0;
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.tx_ring_size = tx_size;
+	new_prof.rx_ring_size = rx_size;
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
-	priv->prof->tx_ring_size = tx_size;
-	priv->prof->rx_ring_size = rx_size;
+	mlx4_en_safe_replace_resources(priv, tmp);
 
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
 	if (port_up) {
 		err = mlx4_en_start_port(dev);
 		if (err)
@@ -1084,8 +1089,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 	}
 
 	err = mlx4_en_moderation_update(priv);
-
 out:
+	kfree(tmp);
 	mutex_unlock(&mdev->state_lock);
 	return err;
 }
@@ -1714,6 +1719,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	int port_up = 0;
 	int err = 0;
 
@@ -1723,23 +1730,26 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	    !channel->tx_count || !channel->rx_count)
 		return -EINVAL;
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.num_tx_rings_p_up = channel->tx_count;
+	new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
+	new_prof.rx_ring_num = channel->rx_count;
+
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
-	priv->num_tx_rings_p_up = channel->tx_count;
-	priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
-	priv->rx_ring_num = channel->rx_count;
-
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
+	mlx4_en_safe_replace_resources(priv, tmp);
 
 	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
@@ -1757,8 +1767,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	}
 
 	err = mlx4_en_moderation_update(priv);
-
 out:
+	kfree(tmp);
 	mutex_unlock(&mdev->state_lock);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0c0dfd6cdca6..8359e9e51b3b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1954,7 +1954,7 @@ static int mlx4_en_close(struct net_device *dev)
 	return 0;
 }
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv)
+static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 {
 	int i;
 
@@ -1979,7 +1979,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 
 }
 
-int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
+static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
@@ -2044,6 +2044,77 @@ static void mlx4_en_shutdown(struct net_device *dev)
 	rtnl_unlock();
 }
 
+static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
+			     struct mlx4_en_priv *src,
+			     struct mlx4_en_port_profile *prof)
+{
+	memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
+	       sizeof(dst->hwtstamp_config));
+	dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
+	dst->tx_ring_num = prof->tx_ring_num;
+	dst->rx_ring_num = prof->rx_ring_num;
+	dst->flags = prof->flags;
+	dst->mdev = src->mdev;
+	dst->port = src->port;
+	dst->dev = src->dev;
+	dst->prof = prof;
+	dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+					 DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+
+	dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
+				GFP_KERNEL);
+	if (!dst->tx_ring)
+		return -ENOMEM;
+
+	dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
+			      GFP_KERNEL);
+	if (!dst->tx_cq) {
+		kfree(dst->tx_ring);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
+				struct mlx4_en_priv *src)
+{
+	memcpy(dst->rx_ring, src->rx_ring,
+	       sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num);
+	memcpy(dst->rx_cq, src->rx_cq,
+	       sizeof(struct mlx4_en_cq *) * src->rx_ring_num);
+	memcpy(&dst->hwtstamp_config, &src->hwtstamp_config,
+	       sizeof(dst->hwtstamp_config));
+	dst->tx_ring_num = src->tx_ring_num;
+	dst->rx_ring_num = src->rx_ring_num;
+	dst->tx_ring = src->tx_ring;
+	dst->tx_cq = src->tx_cq;
+	memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
+}
+
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+				struct mlx4_en_priv *tmp,
+				struct mlx4_en_port_profile *prof)
+{
+	mlx4_en_copy_priv(tmp, priv, prof);
+
+	if (mlx4_en_alloc_resources(tmp)) {
+		en_warn(priv,
+			"%s: Resource allocation failed, using previous configuration\n",
+			__func__);
+		kfree(tmp->tx_ring);
+		kfree(tmp->tx_cq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+				    struct mlx4_en_priv *tmp)
+{
+	mlx4_en_free_resources(priv);
+	mlx4_en_update_priv(priv, tmp);
+}
+
 void mlx4_en_destroy_netdev(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2080,6 +2151,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	mdev->upper[priv->port] = NULL;
 	mutex_unlock(&mdev->state_lock);
 
+#ifdef CONFIG_RFS_ACCEL
+	mlx4_en_cleanup_filters(priv);
+#endif
+
 	mlx4_en_free_resources(priv);
 
 	kfree(priv->tx_ring);
@@ -3124,6 +3199,8 @@ int mlx4_en_reset_config(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	int port_up = 0;
 	int err = 0;
 
@@ -3140,19 +3217,29 @@ int mlx4_en_reset_config(struct net_device *dev,
 		return -EINVAL;
 	}
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config));
+
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
 	en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-		ts_config.rx_filter, !!(features & NETIF_F_HW_VLAN_CTAG_RX));
+		ts_config.rx_filter,
+		!!(features & NETIF_F_HW_VLAN_CTAG_RX));
 
-	priv->hwtstamp_config.tx_type = ts_config.tx_type;
-	priv->hwtstamp_config.rx_filter = ts_config.rx_filter;
+	mlx4_en_safe_replace_resources(priv, tmp);
 
 	if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
 		if (features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -3186,11 +3273,6 @@ int mlx4_en_reset_config(struct net_device *dev,
 		dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 	}
 
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
 	if (port_up) {
 		err = mlx4_en_start_port(dev);
 		if (err)
@@ -3199,6 +3281,8 @@ int mlx4_en_reset_config(struct net_device *dev,
 
 out:
 	mutex_unlock(&mdev->state_lock);
-	netdev_features_change(dev);
+	kfree(tmp);
+	if (!err)
+		netdev_features_change(dev);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c1b3a9c8cf3b..99b5407f2278 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -514,9 +514,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 	ring->rx_info = NULL;
 	kfree(ring);
 	*pring = NULL;
-#ifdef CONFIG_RFS_ACCEL
-	mlx4_en_cleanup_filters(priv);
-#endif
 }
 
 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 467d47ed2c39..13d297ee34bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -353,12 +353,14 @@ struct mlx4_en_port_profile {
 	u32 rx_ring_num;
 	u32 tx_ring_size;
 	u32 rx_ring_size;
+	u8 num_tx_rings_p_up;
 	u8 rx_pause;
 	u8 rx_ppp;
 	u8 tx_pause;
 	u8 tx_ppp;
 	int rss_rings;
 	int inline_thold;
+	struct hwtstamp_config hwtstamp_config;
 };
 
 struct mlx4_en_profile {
@@ -623,8 +625,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 			      u8 rx_ppp, u8 rx_pause,
 			      u8 tx_ppp, u8 tx_pause);
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv);
-int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+				struct mlx4_en_priv *tmp,
+				struct mlx4_en_port_profile *prof);
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+				    struct mlx4_en_priv *tmp);
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
 		      int entries, int ring, enum cq_type mode, int node);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7a0dca29c642..5a4d88c2cdb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1348,6 +1348,11 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv)
 			goto err_close_channels;
 	}
 
+	/* FIXME: This is a W/A for tx timeout watch dog false alarm when
+	 * polling for inactive tx queues.
+	 */
+	netif_tx_start_all_queues(priv->netdev);
+
 	kfree(cparam);
 	return 0;
 
@@ -1367,6 +1372,12 @@ static void mlx5e_close_channels(struct mlx5e_priv *priv)
 {
 	int i;
 
+	/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
+	 * polling for inactive tx queues.
+	 */
+	netif_tx_stop_all_queues(priv->netdev);
+	netif_tx_disable(priv->netdev);
+
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_close_channel(priv->channel[i]);
 
@@ -2656,7 +2667,7 @@ static void mlx5e_tx_timeout(struct net_device *dev)
 	for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
 		struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
 
-		if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
+		if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
 			continue;
 		sched_work = true;
 		set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 05de77267d58..e25a73ed2981 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)];
 	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)];
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
 
 	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
 		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1977e7a5c530..57d48da709fb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2718,7 +2718,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
  * Configures the switch priority to buffer table.
  */
 #define MLXSW_REG_PPTB_ID 0x500B
-#define MLXSW_REG_PPTB_LEN 0x0C
+#define MLXSW_REG_PPTB_LEN 0x10
 
 static const struct mlxsw_reg_info mlxsw_reg_pptb = {
 	.id = MLXSW_REG_PPTB_ID,
@@ -2784,6 +2784,13 @@ MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8);
  */
 MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4);
 
+/* reg_pptb_prio_to_buff_msb
+ * Mapping of switch priority <i+8> to one of the allocated receive port
+ * buffers.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff_msb, 0x0C, 0x04, 4);
+
 #define MLXSW_REG_PPTB_ALL_PRIO 0xFF
 
 static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
@@ -2792,6 +2799,14 @@ static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
 	mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM);
 	mlxsw_reg_pptb_local_port_set(payload, local_port);
 	mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+	mlxsw_reg_pptb_pm_msb_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+}
+
+static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio,
+						    u8 buff)
+{
+	mlxsw_reg_pptb_prio_to_buff_set(payload, prio, buff);
+	mlxsw_reg_pptb_prio_to_buff_msb_set(payload, prio, buff);
 }
 
 /* PBMC - Port Buffer Management Control Register
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 660429ebfbe1..374080027b2f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -171,23 +171,6 @@ static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl);
 }
 
-static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port,
-					 bool *p_is_up)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char paos_pl[MLXSW_REG_PAOS_LEN];
-	u8 oper_status;
-	int err;
-
-	mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(paos), paos_pl);
-	if (err)
-		return err;
-	oper_status = mlxsw_reg_paos_oper_status_get(paos_pl);
-	*p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false;
-	return 0;
-}
-
 static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				      unsigned char *addr)
 {
@@ -1434,7 +1417,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev,
 
 	cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) |
 			 mlxsw_sp_from_ptys_supported_link(eth_proto_cap) |
-			 SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+			 SUPPORTED_Pause | SUPPORTED_Asym_Pause |
+			 SUPPORTED_Autoneg;
 	cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin);
 	mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev),
 					eth_proto_oper, cmd);
@@ -1493,7 +1477,6 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev,
 	u32 eth_proto_new;
 	u32 eth_proto_cap;
 	u32 eth_proto_admin;
-	bool is_up;
 	int err;
 
 	speed = ethtool_cmd_speed(cmd);
@@ -1525,12 +1508,7 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev,
 		return err;
 	}
 
-	err = mlxsw_sp_port_oper_status_get(mlxsw_sp_port, &is_up);
-	if (err) {
-		netdev_err(dev, "Failed to get oper status");
-		return err;
-	}
-	if (!is_up)
+	if (!netif_running(dev))
 		return 0;
 
 	err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index a3720a0fad7d..074cdda7b6f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -194,7 +194,7 @@ static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port)
 
 	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, 0);
+		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0);
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
 			       pptb_pl);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 0b323661c0b6..01cfb7512827 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -103,7 +103,8 @@ static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, prio_tc[i]);
+		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]);
+
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
 			       pptb_pl);
 }
@@ -249,6 +250,7 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
 		return err;
 
 	memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets));
+	mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
 
 	return 0;
 }
@@ -351,7 +353,8 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	int err;
 
-	if (mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) {
+	if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
+	    pfc->pfc_en) {
 		netdev_err(dev, "PAUSE frames already enabled on port\n");
 		return -EINVAL;
 	}
@@ -371,6 +374,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 	}
 
 	memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc));
+	mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
 
 	return 0;
 
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 8dedafa1a95d..a30ee427efab 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2601,8 +2601,6 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	spin_lock_bh(&pn->all_channels_lock);
 	list_del(&pch->list);
 	spin_unlock_bh(&pn->all_channels_lock);
-	put_net(pch->chan_net);
-	pch->chan_net = NULL;
 
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
@@ -3136,6 +3134,9 @@ ppp_disconnect_channel(struct channel *pch)
  */
 static void ppp_destroy_channel(struct channel *pch)
 {
+	put_net(pch->chan_net);
+	pch->chan_net = NULL;
+
 	atomic_dec(&channel_count);
 
 	if (!pch->file.dead) {
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0da72d39b4f9..e9654a685381 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -26,6 +26,7 @@
 #include <linux/mdio.h>
 #include <linux/usb/cdc.h>
 #include <linux/suspend.h>
+#include <linux/acpi.h>
 
 /* Information for net-next */
 #define NETNEXT_VERSION		"08"
@@ -460,6 +461,11 @@
 /* SRAM_IMPEDANCE */
 #define RX_DRIVING_MASK		0x6000
 
+/* MAC PASSTHRU */
+#define AD_MASK			0xfee0
+#define EFUSE			0xcfdb
+#define PASS_THRU_MASK		0x1
+
 enum rtl_register_content {
 	_1000bps	= 0x10,
 	_100bps		= 0x08,
@@ -1036,6 +1042,65 @@ out1:
 	return ret;
 }
 
+/* Devices containing RTL8153-AD can support a persistent
+ * host system provided MAC address.
+ * Examples of this are Dell TB15 and Dell WD15 docks
+ */
+static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	int ret = -EINVAL;
+	u32 ocp_data;
+	unsigned char buf[6];
+
+	/* test for -AD variant of RTL8153 */
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
+	if ((ocp_data & AD_MASK) != 0x1000)
+		return -ENODEV;
+
+	/* test for MAC address pass-through bit */
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, EFUSE);
+	if ((ocp_data & PASS_THRU_MASK) != 1)
+		return -ENODEV;
+
+	/* returns _AUXMAC_#AABBCCDDEEFF# */
+	status = acpi_evaluate_object(NULL, "\\_SB.AMAC", NULL, &buffer);
+	obj = (union acpi_object *)buffer.pointer;
+	if (!ACPI_SUCCESS(status))
+		return -ENODEV;
+	if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid buffer when reading pass-thru MAC addr: "
+			   "(%d, %d)\n",
+			   obj->type, obj->string.length);
+		goto amacout;
+	}
+	if (strncmp(obj->string.pointer, "_AUXMAC_#", 9) != 0 ||
+	    strncmp(obj->string.pointer + 0x15, "#", 1) != 0) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid header when reading pass-thru MAC addr\n");
+		goto amacout;
+	}
+	ret = hex2bin(buf, obj->string.pointer + 9, 6);
+	if (!(ret == 0 && is_valid_ether_addr(buf))) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid MAC when reading pass-thru MAC addr: "
+			   "%d, %pM\n", ret, buf);
+		ret = -EINVAL;
+		goto amacout;
+	}
+	memcpy(sa->sa_data, buf, 6);
+	ether_addr_copy(tp->netdev->dev_addr, sa->sa_data);
+	netif_info(tp, probe, tp->netdev,
+		   "Using pass-thru MAC addr %pM\n", sa->sa_data);
+
+amacout:
+	kfree(obj);
+	return ret;
+}
+
 static int set_ethernet_addr(struct r8152 *tp)
 {
 	struct net_device *dev = tp->netdev;
@@ -1044,8 +1109,15 @@ static int set_ethernet_addr(struct r8152 *tp)
 
 	if (tp->version == RTL_VER_01)
 		ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
-	else
-		ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa.sa_data);
+	else {
+		/* if this is not an RTL8153-AD, no eFuse mac pass thru set,
+		 * or system doesn't provide valid _SB.AMAC this will be
+		 * be expected to non-zero
+		 */
+		ret = vendor_mac_passthru_addr_read(tp, &sa);
+		if (ret < 0)
+			ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa.sa_data);
+	}
 
 	if (ret < 0) {
 		netif_err(tp, probe, dev, "Get ether addr fail\n");
@@ -2296,10 +2368,6 @@ static u32 __rtl_get_wol(struct r8152 *tp)
 	u32 ocp_data;
 	u32 wolopts = 0;
 
-	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG5);
-	if (!(ocp_data & LAN_WAKE_EN))
-		return 0;
-
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
 	if (ocp_data & LINK_ON_WAKE_EN)
 		wolopts |= WAKE_PHY;
@@ -2332,15 +2400,13 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts)
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5);
-	ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN | LAN_WAKE_EN);
+	ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN);
 	if (wolopts & WAKE_UCAST)
 		ocp_data |= UWF_EN;
 	if (wolopts & WAKE_BCAST)
 		ocp_data |= BWF_EN;
 	if (wolopts & WAKE_MCAST)
 		ocp_data |= MWF_EN;
-	if (wolopts & WAKE_ANY)
-		ocp_data |= LAN_WAKE_EN;
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG5, ocp_data);
 
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
@@ -4359,3 +4425,4 @@ module_usb_driver(rtl8152_driver);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1a51584a382b..d5fb55c0a9d9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1394,19 +1394,22 @@ static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return nsa->ns_id - nsb->ns_id;
 }
 
-static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
-	struct nvme_ns *ns;
-
-	lockdep_assert_held(&ctrl->namespaces_mutex);
+	struct nvme_ns *ns, *ret = NULL;
 
+	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
-		if (ns->ns_id == nsid)
-			return ns;
+		if (ns->ns_id == nsid) {
+			kref_get(&ns->kref);
+			ret = ns;
+			break;
+		}
 		if (ns->ns_id > nsid)
 			break;
 	}
-	return NULL;
+	mutex_unlock(&ctrl->namespaces_mutex);
+	return ret;
 }
 
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@ -1415,8 +1418,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	struct gendisk *disk;
 	int node = dev_to_node(ctrl->dev);
 
-	lockdep_assert_held(&ctrl->namespaces_mutex);
-
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
 		return;
@@ -1457,7 +1458,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	if (nvme_revalidate_disk(ns->disk))
 		goto out_free_disk;
 
-	list_add_tail_rcu(&ns->list, &ctrl->namespaces);
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_add_tail(&ns->list, &ctrl->namespaces);
+	mutex_unlock(&ctrl->namespaces_mutex);
+
 	kref_get(&ctrl->kref);
 	if (ns->type == NVME_NS_LIGHTNVM)
 		return;
@@ -1480,8 +1484,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
-	lockdep_assert_held(&ns->ctrl->namespaces_mutex);
-
 	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
 		return;
 
@@ -1494,8 +1496,11 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
+
+	mutex_lock(&ns->ctrl->namespaces_mutex);
 	list_del_init(&ns->list);
-	synchronize_rcu();
+	mutex_unlock(&ns->ctrl->namespaces_mutex);
+
 	nvme_put_ns(ns);
 }
 
@@ -1503,10 +1508,11 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
 
-	ns = nvme_find_ns(ctrl, nsid);
+	ns = nvme_find_get_ns(ctrl, nsid);
 	if (ns) {
 		if (revalidate_disk(ns->disk))
 			nvme_ns_remove(ns);
+		nvme_put_ns(ns);
 	} else
 		nvme_alloc_ns(ctrl, nsid);
 }
@@ -1535,9 +1541,11 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
 			nvme_validate_ns(ctrl, nsid);
 
 			while (++prev < nsid) {
-				ns = nvme_find_ns(ctrl, prev);
-				if (ns)
+				ns = nvme_find_get_ns(ctrl, prev);
+				if (ns) {
 					nvme_ns_remove(ns);
+					nvme_put_ns(ns);
+				}
 			}
 		}
 		nn -= j;
@@ -1552,8 +1560,6 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
 	struct nvme_ns *ns, *next;
 	unsigned i;
 
-	lockdep_assert_held(&ctrl->namespaces_mutex);
-
 	for (i = 1; i <= nn; i++)
 		nvme_validate_ns(ctrl, i);
 
@@ -1576,7 +1582,6 @@ static void nvme_scan_work(struct work_struct *work)
 	if (nvme_identify_ctrl(ctrl, &id))
 		return;
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	nn = le32_to_cpu(id->nn);
 	if (ctrl->vs >= NVME_VS(1, 1) &&
 	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
@@ -1585,6 +1590,7 @@ static void nvme_scan_work(struct work_struct *work)
 	}
 	nvme_scan_ns_sequential(ctrl, nn);
  done:
+	mutex_lock(&ctrl->namespaces_mutex);
 	list_sort(NULL, &ctrl->namespaces, ns_cmp);
 	mutex_unlock(&ctrl->namespaces_mutex);
 	kfree(id);
@@ -1604,6 +1610,11 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_queue_scan);
 
+/*
+ * This function iterates the namespace list unlocked to allow recovery from
+ * controller failure. It is up to the caller to ensure the namespace list is
+ * not modified by scan work while this function is executing.
+ */
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns, *next;
@@ -1617,10 +1628,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	if (ctrl->state == NVME_CTRL_DEAD)
 		nvme_kill_queues(ctrl);
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
 		nvme_ns_remove(ns);
-	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
@@ -1791,11 +1800,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
-		if (!kref_get_unless_zero(&ns->kref))
-			continue;
-
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
 		 * end buffered writers dirtying pages that can't be synced.
@@ -1806,10 +1812,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 		blk_set_queue_dying(ns->queue);
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_mq_start_stopped_hw_queues(ns->queue, true);
-
-		nvme_put_ns(ns);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);
 
@@ -1817,8 +1821,8 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		spin_lock_irq(ns->queue->queue_lock);
 		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
 		spin_unlock_irq(ns->queue->queue_lock);
@@ -1826,7 +1830,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
 		blk_mq_cancel_requeue_work(ns->queue);
 		blk_mq_stop_hw_queues(ns->queue);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_queues);
 
@@ -1834,13 +1838,13 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
 		blk_mq_start_stopped_hw_queues(ns->queue, true);
 		blk_mq_kick_requeue_list(ns->queue);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index 38a8bbe74810..83797d89c30f 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -195,7 +195,7 @@ static void parport_detach(struct parport *port)
 	struct pps_client_pp *device;
 
 	/* FIXME: oooh, this is ugly! */
-	if (strcmp(pardev->name, KBUILD_MODNAME))
+	if (!pardev || strcmp(pardev->name, KBUILD_MODNAME))
 		/* not our port */
 		return;
 
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 526bf23dcb49..6c7fe4778793 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -152,7 +152,6 @@ static const struct regulator_ops rpm_smps_ldo_ops_fixed = {
 	.enable = rpm_reg_enable,
 	.disable = rpm_reg_disable,
 	.is_enabled = rpm_reg_is_enabled,
-	.list_voltage = regulator_list_voltage_linear_range,
 
 	.get_voltage = rpm_reg_get_voltage,
 	.set_voltage = rpm_reg_set_voltage,
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index f973bfce5d08..1e93a37e27f0 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -366,34 +366,22 @@ static void to_utf8(struct vc_data *vc, uint c)
 
 static void do_compute_shiftstate(void)
 {
-	unsigned int i, j, k, sym, val;
+	unsigned int k, sym, val;
 
 	shift_state = 0;
 	memset(shift_down, 0, sizeof(shift_down));
 
-	for (i = 0; i < ARRAY_SIZE(key_down); i++) {
-
-		if (!key_down[i])
+	for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) {
+		sym = U(key_maps[0][k]);
+		if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
 			continue;
 
-		k = i * BITS_PER_LONG;
-
-		for (j = 0; j < BITS_PER_LONG; j++, k++) {
-
-			if (!test_bit(k, key_down))
-				continue;
+		val = KVAL(sym);
+		if (val == KVAL(K_CAPSSHIFT))
+			val = KVAL(K_SHIFT);
 
-			sym = U(key_maps[0][k]);
-			if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
-				continue;
-
-			val = KVAL(sym);
-			if (val == KVAL(K_CAPSSHIFT))
-				val = KVAL(K_SHIFT);
-
-			shift_down[val]++;
-			shift_state |= (1 << val);
-		}
+		shift_down[val]++;
+		shift_state |= BIT(val);
 	}
 }