From d6157e4f18173ad24441aa9ca04e7e9121a9b4c7 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 10 Aug 2015 16:41:45 +0200
Subject: ARM: at91: remove useless includes in platform_data/atmel.h

include/linux/platform_data/atmel.h has a lot of useless includes, remove
them.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 include/linux/platform_data/atmel.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 527a85c61924..4d67a5e82c83 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -9,15 +9,7 @@
 
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
-#include <linux/device.h>
-#include <linux/i2c.h>
-#include <linux/leds.h>
-#include <linux/spi/spi.h>
-#include <linux/usb/atmel_usba_udc.h>
-#include <linux/atmel-mci.h>
-#include <sound/atmel-ac97c.h>
 #include <linux/serial.h>
-#include <linux/platform_data/macb.h>
 
 /*
  * at91: 6 USARTs and one DBGU port (SAM9260)
-- 
cgit v1.2.3


From 5cfc5220a63b1008e7198fb4f91c3ef763e46657 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Fri, 18 Sep 2015 10:14:41 +0300
Subject: ARM: pxa: Remove unused clock_enable field from struct
 pxa2xx_spi_master

Use for struct pxa2xx_spi_master clock_enable field was removed years ago
from the pxa2xx-spi driver by the commit 2f1a74e5a2de ("[ARM] pxa: make
pxa2xx_spi driver use ssp_request()/ssp_free()").

Therefore remove it from structure definition, documentation and from
couple affected board files.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
 Documentation/spi/pxa2xx       | 6 ------
 arch/arm/mach-pxa/hx4700.c     | 1 -
 arch/arm/mach-pxa/icontrol.c   | 2 --
 arch/arm/mach-pxa/z2.c         | 2 --
 include/linux/spi/pxa2xx_spi.h | 1 -
 5 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index 3352f97430e4..13a0b7fb192f 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -22,15 +22,10 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
 found in include/linux/spi/pxa2xx_spi.h:
 
 struct pxa2xx_spi_master {
-	u32 clock_enable;
 	u16 num_chipselect;
 	u8 enable_dma;
 };
 
-The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the
-corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See
-the "PXA2xx Developer Manual" section "Clocks and Power Management".
-
 The "pxa2xx_spi_master.num_chipselect" field is used to determine the number of
 slave device (chips) attached to this SPI master.
 
@@ -57,7 +52,6 @@ static struct resource pxa_spi_nssp_resources[] = {
 };
 
 static struct pxa2xx_spi_master pxa_nssp_master_info = {
-	.clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
 	.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
 	.enable_dma = 1, /* Enables NSSP DMA */
 };
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 5fb41ad6e3bc..1e0301a0dbbb 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -630,7 +630,6 @@ static struct spi_board_info tsc2046_board_info[] __initdata = {
 
 static struct pxa2xx_spi_master pxa_ssp2_master_info = {
 	.num_chipselect = 1,
-	.clock_enable   = CKEN_SSP2,
 	.enable_dma     = 1,
 };
 
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 9b0eb0252af6..a1869f9b6219 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -116,13 +116,11 @@ static struct spi_board_info mcp251x_board_info[] = {
 };
 
 static struct pxa2xx_spi_master pxa_ssp3_spi_master_info = {
-	.clock_enable   = CKEN_SSP3,
 	.num_chipselect = 2,
 	.enable_dma     = 1
 };
 
 static struct pxa2xx_spi_master pxa_ssp4_spi_master_info = {
-	.clock_enable   = CKEN_SSP4,
 	.num_chipselect = 2,
 	.enable_dma     = 1
 };
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index e1a121b36cfa..3deeca7bbe41 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -595,13 +595,11 @@ static struct spi_board_info spi_board_info[] __initdata = {
 };
 
 static struct pxa2xx_spi_master pxa_ssp1_master_info = {
-	.clock_enable	= CKEN_SSP,
 	.num_chipselect	= 1,
 	.enable_dma	= 1,
 };
 
 static struct pxa2xx_spi_master pxa_ssp2_master_info = {
-	.clock_enable	= CKEN_SSP2,
 	.num_chipselect	= 1,
 };
 
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 6d36dacec4ba..9ec4c147abbc 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -23,7 +23,6 @@ struct dma_chan;
 
 /* device.platform_data for SSP controller devices */
 struct pxa2xx_spi_master {
-	u32 clock_enable;
 	u16 num_chipselect;
 	u8 enable_dma;
 
-- 
cgit v1.2.3


From a645654b817feba05e5156345325d19fc85ebc9f Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 23 Aug 2015 15:18:55 +0200
Subject: vga_switcheroo: Document _ALL_ the things!

This adds an "Overview" DOC section plus two DOC sections for the modes
of use ("Manual switching and manual power control" and "Driver power
control").

Also included is kernel-doc for all public functions, structs and enums.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 285 +++++++++++++++++++++++++++++++++++++--
 include/linux/vga_switcheroo.h   |  85 +++++++++++-
 2 files changed, 353 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 21060668fd25..b19a72f7ac7c 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -1,20 +1,31 @@
 /*
+ * vga_switcheroo.c - Support for laptop with dual GPU using one set of outputs
+ *
  * Copyright (c) 2010 Red Hat Inc.
  * Author : Dave Airlie <airlied@redhat.com>
  *
+ * Copyright (c) 2015 Lukas Wunner <lukas@wunner.de>
  *
- * Licensed under GPLv2
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
- * vga_switcheroo.c - Support for laptop with dual GPU using one set of outputs
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
  *
- * Switcher interface - methods require for ATPX and DCM
- * - switchto - this throws the output MUX switch
- * - discrete_set_power - sets the power state for the discrete card
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS
+ * IN THE SOFTWARE.
  *
- * GPU driver interface
- * - set_gpu_state - this should do the equiv of s/r for the card
- *                 - this should *not* set the discrete power state
- * - switch_check  - check if the device is in a position to switch now
  */
 
 #define pr_fmt(fmt) "vga_switcheroo: " fmt
@@ -33,6 +44,61 @@
 
 #include <linux/vgaarb.h>
 
+/**
+ * DOC: Overview
+ *
+ * vga_switcheroo is the Linux subsystem for laptop hybrid graphics.
+ * These come in two flavors:
+ *
+ * * muxed: Dual GPUs with a multiplexer chip to switch outputs between GPUs.
+ * * muxless: Dual GPUs but only one of them is connected to outputs.
+ * 	The other one is merely used to offload rendering, its results
+ * 	are copied over PCIe into the framebuffer. On Linux this is
+ * 	supported with DRI PRIME.
+ *
+ * Hybrid graphics started to appear in the late Naughties and were initially
+ * all muxed. Newer laptops moved to a muxless architecture for cost reasons.
+ * A notable exception is the MacBook Pro which continues to use a mux.
+ * Muxes come with varying capabilities: Some switch only the panel, others
+ * can also switch external displays. Some switch all display pins at once
+ * while others can switch just the DDC lines. (To allow EDID probing
+ * for the inactive GPU.) Also, muxes are often used to cut power to the
+ * discrete GPU while it is not used.
+ *
+ * DRM drivers register GPUs with vga_switcheroo, these are heretoforth called
+ * clients. The mux is called the handler. Muxless machines also register a
+ * handler to control the power state of the discrete GPU, its ->switchto
+ * callback is a no-op for obvious reasons. The discrete GPU is often equipped
+ * with an HDA controller for the HDMI/DP audio signal, this will also
+ * register as a client so that vga_switcheroo can take care of the correct
+ * suspend/resume order when changing the discrete GPU's power state. In total
+ * there can thus be up to three clients: Two vga clients (GPUs) and one audio
+ * client (on the discrete GPU). The code is mostly prepared to support
+ * machines with more than two GPUs should they become available.
+ * The GPU to which the outputs are currently switched is called the
+ * active client in vga_switcheroo parlance. The GPU not in use is the
+ * inactive client.
+ */
+
+/**
+ * struct vga_switcheroo_client - registered client
+ * @pdev: client pci device
+ * @fb_info: framebuffer to which console is remapped on switching
+ * @pwr_state: current power state
+ * @ops: client callbacks
+ * @id: client identifier, see enum vga_switcheroo_client_id.
+ * 	Determining the id requires the handler, so GPUs are initially
+ * 	assigned -1 and later given their true id in vga_switcheroo_enable()
+ * @active: whether the outputs are currently switched to this client
+ * @driver_power_control: whether power state is controlled by the driver's
+ * 	runtime pm. If true, writing ON and OFF to the vga_switcheroo debugfs
+ * 	interface is a no-op so as not to interfere with runtime pm
+ * @list: client list
+ *
+ * Registered client. A client can be either a GPU or an audio device on a GPU.
+ * For audio clients, the @fb_info, @active and @driver_power_control members
+ * are bogus.
+ */
 struct vga_switcheroo_client {
 	struct pci_dev *pdev;
 	struct fb_info *fb_info;
@@ -44,10 +110,28 @@ struct vga_switcheroo_client {
 	struct list_head list;
 };
 
+/*
+ * protects access to struct vgasr_priv
+ */
 static DEFINE_MUTEX(vgasr_mutex);
 
+/**
+ * struct vgasr_priv - vga_switcheroo private data
+ * @active: whether vga_switcheroo is enabled.
+ * 	Prerequisite is the registration of two GPUs and a handler
+ * @delayed_switch_active: whether a delayed switch is pending
+ * @delayed_client_id: client to which a delayed switch is pending
+ * @debugfs_root: directory for vga_switcheroo debugfs interface
+ * @switch_file: file for vga_switcheroo debugfs interface
+ * @registered_clients: number of registered GPUs
+ * 	(counting only vga clients, not audio clients)
+ * @clients: list of registered clients
+ * @handler: registered handler
+ *
+ * vga_switcheroo private data. Currently only one vga_switcheroo instance
+ * per system is supported.
+ */
 struct vgasr_priv {
-
 	bool active;
 	bool delayed_switch_active;
 	enum vga_switcheroo_client_id delayed_client_id;
@@ -103,6 +187,15 @@ static void vga_switcheroo_enable(void)
 	vgasr_priv.active = true;
 }
 
+/**
+ * vga_switcheroo_register_handler() - register handler
+ * @handler: handler callbacks
+ *
+ * Register handler. Enable vga_switcheroo if two vga clients have already
+ * registered.
+ *
+ * Return: 0 on success, -EINVAL if a handler was already registered.
+ */
 int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler)
 {
 	mutex_lock(&vgasr_mutex);
@@ -121,6 +214,11 @@ int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler)
 }
 EXPORT_SYMBOL(vga_switcheroo_register_handler);
 
+/**
+ * vga_switcheroo_unregister_handler() - unregister handler
+ *
+ * Unregister handler. Disable vga_switcheroo.
+ */
 void vga_switcheroo_unregister_handler(void)
 {
 	mutex_lock(&vgasr_mutex);
@@ -164,6 +262,19 @@ static int register_client(struct pci_dev *pdev,
 	return 0;
 }
 
+/**
+ * vga_switcheroo_register_client - register vga client
+ * @pdev: client pci device
+ * @ops: client callbacks
+ * @driver_power_control: whether power state is controlled by the driver's
+ * 	runtime pm
+ *
+ * Register vga client (GPU). Enable vga_switcheroo if another GPU and a
+ * handler have already registered. The power state of the client is assumed
+ * to be ON.
+ *
+ * Return: 0 on success, -ENOMEM on memory allocation error.
+ */
 int vga_switcheroo_register_client(struct pci_dev *pdev,
 				   const struct vga_switcheroo_client_ops *ops,
 				   bool driver_power_control)
@@ -174,6 +285,18 @@ int vga_switcheroo_register_client(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL(vga_switcheroo_register_client);
 
+/**
+ * vga_switcheroo_register_audio_client - register audio client
+ * @pdev: client pci device
+ * @ops: client callbacks
+ * @id: client identifier, see enum vga_switcheroo_client_id
+ * @active: whether the audio device is fully initialized
+ *
+ * Register audio client (audio device on a GPU). The power state of the
+ * client is assumed to be ON.
+ *
+ * Return: 0 on success, -ENOMEM on memory allocation error.
+ */
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
 					 int id, bool active)
@@ -215,6 +338,15 @@ find_active_client(struct list_head *head)
 	return NULL;
 }
 
+/**
+ * vga_switcheroo_get_client_state() - obtain power state of a given client
+ * @pdev: client pci device
+ *
+ * Obtain power state of a given client as seen from vga_switcheroo.
+ * The function is only called from hda_intel.c.
+ *
+ * Return: Power state.
+ */
 int vga_switcheroo_get_client_state(struct pci_dev *pdev)
 {
 	struct vga_switcheroo_client *client;
@@ -228,6 +360,12 @@ int vga_switcheroo_get_client_state(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL(vga_switcheroo_get_client_state);
 
+/**
+ * vga_switcheroo_unregister_client() - unregister client
+ * @pdev: client pci device
+ *
+ * Unregister client. Disable vga_switcheroo if this is a vga client (GPU).
+ */
 void vga_switcheroo_unregister_client(struct pci_dev *pdev)
 {
 	struct vga_switcheroo_client *client;
@@ -249,6 +387,14 @@ void vga_switcheroo_unregister_client(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL(vga_switcheroo_unregister_client);
 
+/**
+ * vga_switcheroo_client_fb_set() - set framebuffer of a given client
+ * @pdev: client pci device
+ * @info: framebuffer
+ *
+ * Set framebuffer of a given client. The console will be remapped to this
+ * on switching.
+ */
 void vga_switcheroo_client_fb_set(struct pci_dev *pdev,
 				 struct fb_info *info)
 {
@@ -262,6 +408,42 @@ void vga_switcheroo_client_fb_set(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL(vga_switcheroo_client_fb_set);
 
+/**
+ * DOC: Manual switching and manual power control
+ *
+ * In this mode of use, the file /sys/kernel/debug/vgaswitcheroo/switch
+ * can be read to retrieve the current vga_switcheroo state and commands
+ * can be written to it to change the state. The file appears as soon as
+ * two GPU drivers and one handler have registered with vga_switcheroo.
+ * The following commands are understood:
+ *
+ * * OFF: Power off the device not in use.
+ * * ON: Power on the device not in use.
+ * * IGD: Switch to the integrated graphics device.
+ * 	Power on the integrated GPU if necessary, power off the discrete GPU.
+ * 	Prerequisite is that no user space processes (e.g. Xorg, alsactl)
+ * 	have opened device files of the GPUs or the audio client. If the
+ * 	switch fails, the user may invoke lsof(8) or fuser(1) on /dev/dri/
+ * 	and /dev/snd/controlC1 to identify processes blocking the switch.
+ * * DIS: Switch to the discrete graphics device.
+ * * DIGD: Delayed switch to the integrated graphics device.
+ * 	This will perform the switch once the last user space process has
+ * 	closed the device files of the GPUs and the audio client.
+ * * DDIS: Delayed switch to the discrete graphics device.
+ * * MIGD: Mux-only switch to the integrated graphics device.
+ * 	Does not remap console or change the power state of either gpu.
+ * 	If the integrated GPU is currently off, the screen will turn black.
+ * 	If it is on, the screen will show whatever happens to be in VRAM.
+ * 	Either way, the user has to blindly enter the command to switch back.
+ * * MDIS: Mux-only switch to the discrete graphics device.
+ *
+ * For GPUs whose power state is controlled by the driver's runtime pm,
+ * the ON and OFF commands are a no-op (see next section).
+ *
+ * For muxless machines, the IGD/DIS, DIGD/DDIS and MIGD/MDIS commands
+ * should not be used.
+ */
+
 static int vga_switcheroo_show(struct seq_file *m, void *v)
 {
 	struct vga_switcheroo_client *client;
@@ -559,6 +741,16 @@ fail:
 	return -1;
 }
 
+/**
+ * vga_switcheroo_process_delayed_switch() - helper for delayed switching
+ *
+ * Process a delayed switch if one is pending. DRM drivers should call this
+ * from their ->lastclose callback.
+ *
+ * Return: 0 on success. -EINVAL if no delayed switch is pending, if the client
+ * has unregistered in the meantime or if there are other clients blocking the
+ * switch. If the actual switch fails, an error is reported and 0 is returned.
+ */
 int vga_switcheroo_process_delayed_switch(void)
 {
 	struct vga_switcheroo_client *client;
@@ -589,6 +781,39 @@ err:
 }
 EXPORT_SYMBOL(vga_switcheroo_process_delayed_switch);
 
+/**
+ * DOC: Driver power control
+ *
+ * In this mode of use, the discrete GPU automatically powers up and down at
+ * the discretion of the driver's runtime pm. On muxed machines, the user may
+ * still influence the muxer state by way of the debugfs interface, however
+ * the ON and OFF commands become a no-op for the discrete GPU.
+ *
+ * This mode is the default on Nvidia HybridPower/Optimus and ATI PowerXpress.
+ * Specifying nouveau.runpm=0, radeon.runpm=0 or amdgpu.runpm=0 on the kernel
+ * command line disables it.
+ *
+ * When the driver decides to power up or down, it notifies vga_switcheroo
+ * thereof so that it can (a) power the audio device on the GPU up or down,
+ * and (b) update its internal power state representation for the device.
+ * This is achieved by vga_switcheroo_set_dynamic_switch().
+ *
+ * After the GPU has been suspended, the handler needs to be called to cut
+ * power to the GPU. Likewise it needs to reinstate power before the GPU
+ * can resume. This is achieved by vga_switcheroo_init_domain_pm_ops(),
+ * which augments the GPU's suspend/resume functions by the requisite
+ * calls to the handler.
+ *
+ * When the audio device resumes, the GPU needs to be woken. This is achieved
+ * by vga_switcheroo_init_domain_pm_optimus_hdmi_audio(), which augments the
+ * audio device's resume function.
+ *
+ * On muxed machines, if the mux is initially switched to the discrete GPU,
+ * the user ends up with a black screen when the GPU powers down after boot.
+ * As a workaround, the mux is forced to the integrated GPU on runtime suspend,
+ * cf. https://bugs.freedesktop.org/show_bug.cgi?id=75917
+ */
+
 static void vga_switcheroo_power_switch(struct pci_dev *pdev,
 					enum vga_switcheroo_state state)
 {
@@ -607,8 +832,17 @@ static void vga_switcheroo_power_switch(struct pci_dev *pdev,
 	vgasr_priv.handler->power_state(client->id, state);
 }
 
-/* force a PCI device to a certain state - mainly to turn off audio clients */
-
+/**
+ * vga_switcheroo_set_dynamic_switch() - helper for driver power control
+ * @pdev: client pci device
+ * @dynamic: new power state
+ *
+ * Helper for GPUs whose power state is controlled by the driver's runtime pm.
+ * When the driver decides to power up or down, it notifies vga_switcheroo
+ * thereof using this helper so that it can (a) power the audio device on
+ * the GPU up or down, and (b) update its internal power state representation
+ * for the device.
+ */
 void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev,
 				       enum vga_switcheroo_state dynamic)
 {
@@ -654,8 +888,18 @@ static int vga_switcheroo_runtime_resume(struct device *dev)
 	return 0;
 }
 
-/* this version is for the case where the power switch is separate
-   to the device being powered down. */
+/**
+ * vga_switcheroo_init_domain_pm_ops() - helper for driver power control
+ * @dev: vga client device
+ * @domain: power domain
+ *
+ * Helper for GPUs whose power state is controlled by the driver's runtime pm.
+ * After the GPU has been suspended, the handler needs to be called to cut
+ * power to the GPU. Likewise it needs to reinstate power before the GPU
+ * can resume. To this end, this helper augments the suspend/resume functions
+ * by the requisite calls to the handler. It needs only be called on platforms
+ * where the power switch is separate to the device being powered down.
+ */
 int vga_switcheroo_init_domain_pm_ops(struct device *dev,
 				      struct dev_pm_domain *domain)
 {
@@ -709,6 +953,19 @@ static int vga_switcheroo_runtime_resume_hdmi_audio(struct device *dev)
 	return ret;
 }
 
+/**
+ * vga_switcheroo_init_domain_pm_optimus_hdmi_audio() - helper for driver
+ * 	power control
+ * @dev: audio client device
+ * @domain: power domain
+ *
+ * Helper for GPUs whose power state is controlled by the driver's runtime pm.
+ * When the audio device resumes, the GPU needs to be woken. This helper
+ * augments the audio device's resume function to do that.
+ *
+ * Return: 0 on success, -EINVAL if no power management operations are
+ * defined for this device.
+ */
 int
 vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev,
 						 struct dev_pm_domain *domain)
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index b483abd34493..fe90bfc3b510 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -1,10 +1,31 @@
 /*
+ * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
+ *
  * Copyright (c) 2010 Red Hat Inc.
  * Author : Dave Airlie <airlied@redhat.com>
  *
- * Licensed under GPLv2
+ * Copyright (c) 2015 Lukas Wunner <lukas@wunner.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS
+ * IN THE SOFTWARE.
  *
- * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
  */
 
 #ifndef _LINUX_VGA_SWITCHEROO_H_
@@ -14,6 +35,20 @@
 
 struct pci_dev;
 
+/**
+ * enum vga_switcheroo_state - client power state
+ * @VGA_SWITCHEROO_OFF: off
+ * @VGA_SWITCHEROO_ON: on
+ * @VGA_SWITCHEROO_INIT: client has registered with vga_switcheroo but
+ * 	vga_switcheroo is not enabled, i.e. no second client or no handler
+ * 	has registered. Only used in vga_switcheroo_get_client_state() which
+ * 	in turn is only called from hda_intel.c
+ * @VGA_SWITCHEROO_NOT_FOUND: client has not registered with vga_switcheroo.
+ * 	Only used in vga_switcheroo_get_client_state() which in turn is only
+ * 	called from hda_intel.c
+ *
+ * Client power state.
+ */
 enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
@@ -22,20 +57,64 @@ enum vga_switcheroo_state {
 	VGA_SWITCHEROO_NOT_FOUND,
 };
 
+/**
+ * enum vga_switcheroo_client_id - client identifier
+ * @VGA_SWITCHEROO_IGD: integrated graphics device
+ * @VGA_SWITCHEROO_DIS: discrete graphics device
+ * @VGA_SWITCHEROO_MAX_CLIENTS: currently no more than two GPUs are supported
+ *
+ * Client identifier. Audio clients use the same identifier & 0x100.
+ */
 enum vga_switcheroo_client_id {
 	VGA_SWITCHEROO_IGD,
 	VGA_SWITCHEROO_DIS,
 	VGA_SWITCHEROO_MAX_CLIENTS,
 };
 
+/**
+ * struct vga_switcheroo_handler - handler callbacks
+ * @init: initialize handler.
+ * 	Optional. This gets called when vga_switcheroo is enabled, i.e. when
+ * 	two vga clients have registered. It allows the handler to perform
+ * 	some delayed initialization that depends on the existence of the
+ * 	vga clients. Currently only the radeon and amdgpu drivers use this.
+ * 	The return value is ignored
+ * @switchto: switch outputs to given client.
+ * 	Mandatory. For muxless machines this should be a no-op. Returning 0
+ * 	denotes success, anything else failure (in which case the switch is
+ * 	aborted)
+ * @power_state: cut or reinstate power of given client.
+ * 	Optional. The return value is ignored
+ * @get_client_id: determine if given pci device is integrated or discrete GPU.
+ * 	Mandatory
+ *
+ * Handler callbacks. The multiplexer itself. The @switchto and @get_client_id
+ * methods are mandatory, all others may be set to NULL.
+ */
 struct vga_switcheroo_handler {
+	int (*init)(void);
 	int (*switchto)(enum vga_switcheroo_client_id id);
 	int (*power_state)(enum vga_switcheroo_client_id id,
 			   enum vga_switcheroo_state state);
-	int (*init)(void);
 	int (*get_client_id)(struct pci_dev *pdev);
 };
 
+/**
+ * struct vga_switcheroo_client_ops - client callbacks
+ * @set_gpu_state: do the equivalent of suspend/resume for the card.
+ * 	Mandatory. This should not cut power to the discrete GPU,
+ * 	which is the job of the handler
+ * @reprobe: poll outputs.
+ * 	Optional. This gets called after waking the GPU and switching
+ * 	the outputs to it
+ * @can_switch: check if the device is in a position to switch now.
+ * 	Mandatory. The client should return false if a user space process
+ * 	has one of its device files open
+ *
+ * Client callbacks. A client can be either a GPU or an audio device on a GPU.
+ * The @set_gpu_state and @can_switch methods are mandatory, @reprobe may be
+ * set to NULL. For audio clients, the @reprobe member is bogus.
+ */
 struct vga_switcheroo_client_ops {
 	void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state);
 	void (*reprobe)(struct pci_dev *dev);
-- 
cgit v1.2.3


From 7dc87ff8815ef43717c936faea79013855e3dbef Mon Sep 17 00:00:00 2001
From: Fugang Duan <b38611@freescale.com>
Date: Mon, 7 Sep 2015 10:54:59 +0800
Subject: ARM: imx7d: add imx7d iomux-gpr field define

Add imx7d iomux-gpr field define.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/linux/mfd/syscon/imx7-iomuxc-gpr.h | 47 ++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 include/linux/mfd/syscon/imx7-iomuxc-gpr.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h
new file mode 100644
index 000000000000..4585d6105d68
--- /dev/null
+++ b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_IMX7_IOMUXC_GPR_H
+#define __LINUX_IMX7_IOMUXC_GPR_H
+
+#define IOMUXC_GPR0	0x00
+#define IOMUXC_GPR1	0x04
+#define IOMUXC_GPR2	0x08
+#define IOMUXC_GPR3	0x0c
+#define IOMUXC_GPR4	0x10
+#define IOMUXC_GPR5	0x14
+#define IOMUXC_GPR6	0x18
+#define IOMUXC_GPR7	0x1c
+#define IOMUXC_GPR8	0x20
+#define IOMUXC_GPR9	0x24
+#define IOMUXC_GPR10	0x28
+#define IOMUXC_GPR11	0x2c
+#define IOMUXC_GPR12	0x30
+#define IOMUXC_GPR13	0x34
+#define IOMUXC_GPR14	0x38
+#define IOMUXC_GPR15	0x3c
+#define IOMUXC_GPR16	0x40
+#define IOMUXC_GPR17	0x44
+#define IOMUXC_GPR18	0x48
+#define IOMUXC_GPR19	0x4c
+#define IOMUXC_GPR20	0x50
+#define IOMUXC_GPR21	0x54
+#define IOMUXC_GPR22	0x58
+
+/* For imx7d iomux gpr register field define */
+#define IMX7D_GPR1_IRQ_MASK			(0x1 << 12)
+#define IMX7D_GPR1_ENET1_TX_CLK_SEL_MASK	(0x1 << 13)
+#define IMX7D_GPR1_ENET2_TX_CLK_SEL_MASK	(0x1 << 14)
+#define IMX7D_GPR1_ENET_TX_CLK_SEL_MASK		(0x3 << 13)
+#define IMX7D_GPR1_ENET1_CLK_DIR_MASK		(0x1 << 17)
+#define IMX7D_GPR1_ENET2_CLK_DIR_MASK		(0x1 << 18)
+#define IMX7D_GPR1_ENET_CLK_DIR_MASK		(0x3 << 17)
+
+#define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI		(0x1 << 4)
+
+#endif /* __LINUX_IMX7_IOMUXC_GPR_H */
-- 
cgit v1.2.3


From f15a66e68422ca6bb783142780ad440067f6cc89 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 5 Sep 2015 11:22:39 +0200
Subject: drm: Spell vga_switcheroo consistently

Currently everyone and their dog has their own favourite spelling
for vga_switcheroo. This makes it hard to grep dmesg for log entries
relating to vga_switcheroo. It also makes it hard to find related
source files in the tree.

vga_switcheroo.c uses pr_fmt "vga_switcheroo". Use that everywhere.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/DocBook/drm.tmpl     | 2 +-
 drivers/gpu/drm/omapdrm/omap_drv.c | 2 +-
 include/linux/fb.h                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 9ddf8c6cb887..30401f927156 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -3646,7 +3646,7 @@ void (*postclose) (struct drm_device *, struct drm_file *);</synopsis>
 	plane properties to default value, so that a subsequent open of the
 	device will not inherit state from the previous user. It can also be
 	used to execute delayed power switching state changes, e.g. in
-	conjunction with the vga-switcheroo infrastructure. Beyond that KMS
+	conjunction with the vga_switcheroo infrastructure. Beyond that KMS
 	drivers should not do any further cleanup. Only legacy UMS drivers might
 	need to clean up device state so that the vga console or an independent
 	fbdev driver could take over.
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index a5f9d8bf75ed..d685e23449ce 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -753,7 +753,7 @@ static void dev_lastclose(struct drm_device *dev)
 {
 	int i;
 
-	/* we don't support vga-switcheroo.. so just make sure the fbdev
+	/* we don't support vga_switcheroo.. so just make sure the fbdev
 	 * mode is active
 	 */
 	struct omap_drm_private *priv = dev->dev_private;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index bc9afa74ee11..be40dbaed11e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -156,7 +156,7 @@ struct fb_cursor_user {
 #define FB_EVENT_GET_REQ                0x0D
 /*      Unbind from the console if possible */
 #define FB_EVENT_FB_UNBIND              0x0E
-/*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */
+/*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */
 #define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
 /*      A hardware display blank early change occured */
 #define FB_EARLY_EVENT_BLANK		0x10
-- 
cgit v1.2.3


From 21b45676b7c4b79334d8fe3c5a112af0517b66e9 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 27 Aug 2015 16:43:43 +0200
Subject: vga_switcheroo: Set active attribute to false for audio clients

The active attribute in struct vga_switcheroo_client denotes whether
the outputs are currently switched to this client. The attribute is
only meaningful for vga clients. It is never used for audio clients.

The function vga_switcheroo_register_audio_client() misuses this
attribute to store whether the audio device is fully initialized.
Most likely there was a misunderstanding about the meaning of
"active" when this was added.

Comment from Takashi's review:

"Not really.  The full initialization of audio was meant that the audio
is active indeed.  Admittedly, though, the active flag for each audio
client doesn't play any role because the audio always follows the gfx
state changes, and the value passed there doesn't reflect the actual
state due to the later change.  So, I agree with the removal of the
flag itself -- or let the audio active flag following the
corresponding gfx flag.  The latter will make the proc output more
consistent while the former is certainly more reduction of code."

Set the active attribute to false for audio clients. Remove the
active parameter from vga_switcheroo_register_audio_client() and
its sole caller, hda_intel.c:register_vga_switcheroo().

vga_switcheroo_register_audio_client() was introduced by 3e9e63dbd374
("vga_switcheroo: Add the support for audio clients"). Its use in
hda_intel.c was introduced by a82d51ed24bb ("ALSA: hda - Support
VGA-switcheroo").

v1.1: The changes above imply that in find_active_client() the call
to client_is_vga() is now superfluous. Drop it.

Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
[danvet: Add Takashi's clarification to the commit message.]
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 7 +++----
 include/linux/vga_switcheroo.h   | 4 ++--
 sound/pci/hda/hda_intel.c        | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 67a57090175d..86c03b53e7bf 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -288,7 +288,6 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  * @pdev: client pci device
  * @ops: client callbacks
  * @id: client identifier, see enum vga_switcheroo_client_id
- * @active: whether the audio device is fully initialized
  *
  * Register audio client (audio device on a GPU). The power state of the
  * client is assumed to be ON.
@@ -297,9 +296,9 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  */
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 int id, bool active)
+					 int id)
 {
-	return register_client(pdev, ops, id | ID_BIT_AUDIO, active, false);
+	return register_client(pdev, ops, id | ID_BIT_AUDIO, false, false);
 }
 EXPORT_SYMBOL(vga_switcheroo_register_audio_client);
 
@@ -331,7 +330,7 @@ find_active_client(struct list_head *head)
 	struct vga_switcheroo_client *client;
 
 	list_for_each_entry(client, head, list)
-		if (client->active && client_is_vga(client))
+		if (client->active)
 			return client;
 	return NULL;
 }
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index fe90bfc3b510..376499197717 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -128,7 +128,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev,
 				   bool driver_power_control);
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 int id, bool active);
+					 int id);
 
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
@@ -154,7 +154,7 @@ static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_i
 static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
-	int id, bool active) { return 0; }
+	int id) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
 static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index c38c68f57938..e819013959d9 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1143,8 +1143,7 @@ static int register_vga_switcheroo(struct azx *chip)
 	 * is there any machine with two switchable HDMI audio controllers?
 	 */
 	err = vga_switcheroo_register_audio_client(chip->pci, &azx_vs_ops,
-						    VGA_SWITCHEROO_DIS,
-						    hda->probe_continued);
+						   VGA_SWITCHEROO_DIS);
 	if (err < 0)
 		return err;
 	hda->vga_switcheroo_registered = 1;
-- 
cgit v1.2.3


From 8cb7cf56c9fe5412de238465b27ef35b4d2801aa Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 30 Mar 2015 10:59:52 +0100
Subject: firmware: add support for ARM System Control and Power
 Interface(SCPI) protocol

This patch adds support for System Control and Power Interface (SCPI)
Message Protocol used between the Application Cores(AP) and the System
Control Processor(SCP). The MHU peripheral provides a mechanism for
inter-processor communication between SCP's M3 processor and AP.

SCP offers control and management of the core/cluster power states,
various power domain DVFS including the core/cluster, certain system
clocks configuration, thermal sensors and many others.

This protocol driver provides interface for all the client drivers using
SCPI to make use of the features offered by the SCP.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Jon Medhurst (Tixy) <tixy@linaro.org>
Cc: Jassi Brar <jassisinghbrar@gmail.com>
Cc: Liviu Dudau <Liviu.Dudau@arm.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 MAINTAINERS                   |   2 +
 drivers/firmware/Kconfig      |  19 ++
 drivers/firmware/Makefile     |   1 +
 drivers/firmware/arm_scpi.c   | 711 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/scpi_protocol.h |  61 ++++
 5 files changed, 794 insertions(+)
 create mode 100644 drivers/firmware/arm_scpi.c
 create mode 100644 include/linux/scpi_protocol.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index be7d5c14729b..9598821d6a37 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9154,6 +9154,8 @@ M:	Sudeep Holla <sudeep.holla@arm.com>
 L:	linux-arm-kernel@lists.infradead.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/arm/arm,scpi.txt
+F:	drivers/firmware/arm_scpi.c
+F:	include/linux/scpi_protocol.h
 
 SCSI CDROM DRIVER
 M:	Jens Axboe <axboe@kernel.dk>
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index d8de6a8dd4de..800b7439606e 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -8,6 +8,25 @@ menu "Firmware Drivers"
 config ARM_PSCI_FW
 	bool
 
+config ARM_SCPI_PROTOCOL
+	tristate "ARM System Control and Power Interface (SCPI) Message Protocol"
+	depends on ARM_MHU
+	help
+	  System Control and Power Interface (SCPI) Message Protocol is
+	  defined for the purpose of communication between the Application
+	  Cores(AP) and the System Control Processor(SCP). The MHU peripheral
+	  provides a mechanism for inter-processor communication between SCP
+	  and AP.
+
+	  SCP controls most of the power managament on the Application
+	  Processors. It offers control and management of: the core/cluster
+	  power states, various power domain DVFS including the core/cluster,
+	  certain system clocks configuration, thermal sensors and many
+	  others.
+
+	  This protocol library provides interface for all the client drivers
+	  making use of the features offered by the SCP.
+
 config EDD
 	tristate "BIOS Enhanced Disk Drive calls determine boot disk"
 	depends on X86
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 000830fc6707..e4c3a3fa1580 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -2,6 +2,7 @@
 # Makefile for the linux kernel.
 #
 obj-$(CONFIG_ARM_PSCI_FW)	+= psci.o
+obj-$(CONFIG_ARM_SCPI_PROTOCOL)	+= arm_scpi.o
 obj-$(CONFIG_DMI)		+= dmi_scan.o
 obj-$(CONFIG_DMI_SYSFS)		+= dmi-sysfs.o
 obj-$(CONFIG_EDD)		+= edd.o
diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
new file mode 100644
index 000000000000..cb75c750ca54
--- /dev/null
+++ b/drivers/firmware/arm_scpi.c
@@ -0,0 +1,711 @@
+/*
+ * System Control and Power Interface (SCPI) Message Protocol driver
+ *
+ * SCPI Message Protocol is used between the System Control Processor(SCP)
+ * and the Application Processors(AP). The Message Handling Unit(MHU)
+ * provides a mechanism for inter-processor communication between SCP's
+ * Cortex M3 and AP.
+ *
+ * SCP offers control and management of the core/cluster power states,
+ * various power domain DVFS including the core/cluster, certain system
+ * clocks configuration, thermal sensors and many others.
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/printk.h>
+#include <linux/scpi_protocol.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/spinlock.h>
+
+#define CMD_ID_SHIFT		0
+#define CMD_ID_MASK		0x7f
+#define CMD_TOKEN_ID_SHIFT	8
+#define CMD_TOKEN_ID_MASK	0xff
+#define CMD_DATA_SIZE_SHIFT	16
+#define CMD_DATA_SIZE_MASK	0x1ff
+#define PACK_SCPI_CMD(cmd_id, tx_sz)			\
+	((((cmd_id) & CMD_ID_MASK) << CMD_ID_SHIFT) |	\
+	(((tx_sz) & CMD_DATA_SIZE_MASK) << CMD_DATA_SIZE_SHIFT))
+#define ADD_SCPI_TOKEN(cmd, token)			\
+	((cmd) |= (((token) & CMD_TOKEN_ID_MASK) << CMD_TOKEN_ID_SHIFT))
+
+#define CMD_SIZE(cmd)	(((cmd) >> CMD_DATA_SIZE_SHIFT) & CMD_DATA_SIZE_MASK)
+#define CMD_UNIQ_MASK	(CMD_TOKEN_ID_MASK << CMD_TOKEN_ID_SHIFT | CMD_ID_MASK)
+#define CMD_XTRACT_UNIQ(cmd)	((cmd) & CMD_UNIQ_MASK)
+
+#define SCPI_SLOT		0
+
+#define MAX_DVFS_DOMAINS	8
+#define MAX_DVFS_OPPS		8
+#define DVFS_LATENCY(hdr)	(le32_to_cpu(hdr) >> 16)
+#define DVFS_OPP_COUNT(hdr)	((le32_to_cpu(hdr) >> 8) & 0xff)
+
+#define PROTOCOL_REV_MINOR_BITS	16
+#define PROTOCOL_REV_MINOR_MASK	((1U << PROTOCOL_REV_MINOR_BITS) - 1)
+#define PROTOCOL_REV_MAJOR(x)	((x) >> PROTOCOL_REV_MINOR_BITS)
+#define PROTOCOL_REV_MINOR(x)	((x) & PROTOCOL_REV_MINOR_MASK)
+
+#define FW_REV_MAJOR_BITS	24
+#define FW_REV_MINOR_BITS	16
+#define FW_REV_PATCH_MASK	((1U << FW_REV_MINOR_BITS) - 1)
+#define FW_REV_MINOR_MASK	((1U << FW_REV_MAJOR_BITS) - 1)
+#define FW_REV_MAJOR(x)		((x) >> FW_REV_MAJOR_BITS)
+#define FW_REV_MINOR(x)		(((x) & FW_REV_MINOR_MASK) >> FW_REV_MINOR_BITS)
+#define FW_REV_PATCH(x)		((x) & FW_REV_PATCH_MASK)
+
+#define MAX_RX_TIMEOUT		(msecs_to_jiffies(20))
+
+enum scpi_error_codes {
+	SCPI_SUCCESS = 0, /* Success */
+	SCPI_ERR_PARAM = 1, /* Invalid parameter(s) */
+	SCPI_ERR_ALIGN = 2, /* Invalid alignment */
+	SCPI_ERR_SIZE = 3, /* Invalid size */
+	SCPI_ERR_HANDLER = 4, /* Invalid handler/callback */
+	SCPI_ERR_ACCESS = 5, /* Invalid access/permission denied */
+	SCPI_ERR_RANGE = 6, /* Value out of range */
+	SCPI_ERR_TIMEOUT = 7, /* Timeout has occurred */
+	SCPI_ERR_NOMEM = 8, /* Invalid memory area or pointer */
+	SCPI_ERR_PWRSTATE = 9, /* Invalid power state */
+	SCPI_ERR_SUPPORT = 10, /* Not supported or disabled */
+	SCPI_ERR_DEVICE = 11, /* Device error */
+	SCPI_ERR_BUSY = 12, /* Device busy */
+	SCPI_ERR_MAX
+};
+
+enum scpi_std_cmd {
+	SCPI_CMD_INVALID		= 0x00,
+	SCPI_CMD_SCPI_READY		= 0x01,
+	SCPI_CMD_SCPI_CAPABILITIES	= 0x02,
+	SCPI_CMD_SET_CSS_PWR_STATE	= 0x03,
+	SCPI_CMD_GET_CSS_PWR_STATE	= 0x04,
+	SCPI_CMD_SET_SYS_PWR_STATE	= 0x05,
+	SCPI_CMD_SET_CPU_TIMER		= 0x06,
+	SCPI_CMD_CANCEL_CPU_TIMER	= 0x07,
+	SCPI_CMD_DVFS_CAPABILITIES	= 0x08,
+	SCPI_CMD_GET_DVFS_INFO		= 0x09,
+	SCPI_CMD_SET_DVFS		= 0x0a,
+	SCPI_CMD_GET_DVFS		= 0x0b,
+	SCPI_CMD_GET_DVFS_STAT		= 0x0c,
+	SCPI_CMD_CLOCK_CAPABILITIES	= 0x0d,
+	SCPI_CMD_GET_CLOCK_INFO		= 0x0e,
+	SCPI_CMD_SET_CLOCK_VALUE	= 0x0f,
+	SCPI_CMD_GET_CLOCK_VALUE	= 0x10,
+	SCPI_CMD_PSU_CAPABILITIES	= 0x11,
+	SCPI_CMD_GET_PSU_INFO		= 0x12,
+	SCPI_CMD_SET_PSU		= 0x13,
+	SCPI_CMD_GET_PSU		= 0x14,
+	SCPI_CMD_SENSOR_CAPABILITIES	= 0x15,
+	SCPI_CMD_SENSOR_INFO		= 0x16,
+	SCPI_CMD_SENSOR_VALUE		= 0x17,
+	SCPI_CMD_SENSOR_CFG_PERIODIC	= 0x18,
+	SCPI_CMD_SENSOR_CFG_BOUNDS	= 0x19,
+	SCPI_CMD_SENSOR_ASYNC_VALUE	= 0x1a,
+	SCPI_CMD_SET_DEVICE_PWR_STATE	= 0x1b,
+	SCPI_CMD_GET_DEVICE_PWR_STATE	= 0x1c,
+	SCPI_CMD_COUNT
+};
+
+struct scpi_xfer {
+	u32 slot; /* has to be first element */
+	u32 cmd;
+	u32 status;
+	const void *tx_buf;
+	void *rx_buf;
+	unsigned int tx_len;
+	unsigned int rx_len;
+	struct list_head node;
+	struct completion done;
+};
+
+struct scpi_chan {
+	struct mbox_client cl;
+	struct mbox_chan *chan;
+	void __iomem *tx_payload;
+	void __iomem *rx_payload;
+	struct list_head rx_pending;
+	struct list_head xfers_list;
+	struct scpi_xfer *xfers;
+	spinlock_t rx_lock; /* locking for the rx pending list */
+	struct mutex xfers_lock;
+	u8 token;
+};
+
+struct scpi_drvinfo {
+	u32 protocol_version;
+	u32 firmware_version;
+	int num_chans;
+	atomic_t next_chan;
+	struct scpi_ops *scpi_ops;
+	struct scpi_chan *channels;
+	struct scpi_dvfs_info *dvfs[MAX_DVFS_DOMAINS];
+};
+
+/*
+ * The SCP firmware only executes in little-endian mode, so any buffers
+ * shared through SCPI should have their contents converted to little-endian
+ */
+struct scpi_shared_mem {
+	__le32 command;
+	__le32 status;
+	u8 payload[0];
+} __packed;
+
+struct scp_capabilities {
+	__le32 protocol_version;
+	__le32 event_version;
+	__le32 platform_version;
+	__le32 commands[4];
+} __packed;
+
+struct clk_get_info {
+	__le16 id;
+	__le16 flags;
+	__le32 min_rate;
+	__le32 max_rate;
+	u8 name[20];
+} __packed;
+
+struct clk_get_value {
+	__le32 rate;
+} __packed;
+
+struct clk_set_value {
+	__le16 id;
+	__le16 reserved;
+	__le32 rate;
+} __packed;
+
+struct dvfs_info {
+	__le32 header;
+	struct {
+		__le32 freq;
+		__le32 m_volt;
+	} opps[MAX_DVFS_OPPS];
+} __packed;
+
+struct dvfs_get {
+	u8 index;
+} __packed;
+
+struct dvfs_set {
+	u8 domain;
+	u8 index;
+} __packed;
+
+static struct scpi_drvinfo *scpi_info;
+
+static int scpi_linux_errmap[SCPI_ERR_MAX] = {
+	/* better than switch case as long as return value is continuous */
+	0, /* SCPI_SUCCESS */
+	-EINVAL, /* SCPI_ERR_PARAM */
+	-ENOEXEC, /* SCPI_ERR_ALIGN */
+	-EMSGSIZE, /* SCPI_ERR_SIZE */
+	-EINVAL, /* SCPI_ERR_HANDLER */
+	-EACCES, /* SCPI_ERR_ACCESS */
+	-ERANGE, /* SCPI_ERR_RANGE */
+	-ETIMEDOUT, /* SCPI_ERR_TIMEOUT */
+	-ENOMEM, /* SCPI_ERR_NOMEM */
+	-EINVAL, /* SCPI_ERR_PWRSTATE */
+	-EOPNOTSUPP, /* SCPI_ERR_SUPPORT */
+	-EIO, /* SCPI_ERR_DEVICE */
+	-EBUSY, /* SCPI_ERR_BUSY */
+};
+
+static inline int scpi_to_linux_errno(int errno)
+{
+	if (errno >= SCPI_SUCCESS && errno < SCPI_ERR_MAX)
+		return scpi_linux_errmap[errno];
+	return -EIO;
+}
+
+static void scpi_process_cmd(struct scpi_chan *ch, u32 cmd)
+{
+	unsigned long flags;
+	struct scpi_xfer *t, *match = NULL;
+
+	spin_lock_irqsave(&ch->rx_lock, flags);
+	if (list_empty(&ch->rx_pending)) {
+		spin_unlock_irqrestore(&ch->rx_lock, flags);
+		return;
+	}
+
+	list_for_each_entry(t, &ch->rx_pending, node)
+		if (CMD_XTRACT_UNIQ(t->cmd) == CMD_XTRACT_UNIQ(cmd)) {
+			list_del(&t->node);
+			match = t;
+			break;
+		}
+	/* check if wait_for_completion is in progress or timed-out */
+	if (match && !completion_done(&match->done)) {
+		struct scpi_shared_mem *mem = ch->rx_payload;
+		unsigned int len = min(match->rx_len, CMD_SIZE(cmd));
+
+		match->status = le32_to_cpu(mem->status);
+		memcpy_fromio(match->rx_buf, mem->payload, len);
+		if (match->rx_len > len)
+			memset(match->rx_buf + len, 0, match->rx_len - len);
+		complete(&match->done);
+	}
+	spin_unlock_irqrestore(&ch->rx_lock, flags);
+}
+
+static void scpi_handle_remote_msg(struct mbox_client *c, void *msg)
+{
+	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
+	struct scpi_shared_mem *mem = ch->rx_payload;
+	u32 cmd = le32_to_cpu(mem->command);
+
+	scpi_process_cmd(ch, cmd);
+}
+
+static void scpi_tx_prepare(struct mbox_client *c, void *msg)
+{
+	unsigned long flags;
+	struct scpi_xfer *t = msg;
+	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
+	struct scpi_shared_mem *mem = (struct scpi_shared_mem *)ch->tx_payload;
+
+	if (t->tx_buf)
+		memcpy_toio(mem->payload, t->tx_buf, t->tx_len);
+	if (t->rx_buf) {
+		if (!(++ch->token))
+			++ch->token;
+		ADD_SCPI_TOKEN(t->cmd, ch->token);
+		spin_lock_irqsave(&ch->rx_lock, flags);
+		list_add_tail(&t->node, &ch->rx_pending);
+		spin_unlock_irqrestore(&ch->rx_lock, flags);
+	}
+	mem->command = cpu_to_le32(t->cmd);
+}
+
+static struct scpi_xfer *get_scpi_xfer(struct scpi_chan *ch)
+{
+	struct scpi_xfer *t;
+
+	mutex_lock(&ch->xfers_lock);
+	if (list_empty(&ch->xfers_list)) {
+		mutex_unlock(&ch->xfers_lock);
+		return NULL;
+	}
+	t = list_first_entry(&ch->xfers_list, struct scpi_xfer, node);
+	list_del(&t->node);
+	mutex_unlock(&ch->xfers_lock);
+	return t;
+}
+
+static void put_scpi_xfer(struct scpi_xfer *t, struct scpi_chan *ch)
+{
+	mutex_lock(&ch->xfers_lock);
+	list_add_tail(&t->node, &ch->xfers_list);
+	mutex_unlock(&ch->xfers_lock);
+}
+
+static int scpi_send_message(u8 cmd, void *tx_buf, unsigned int tx_len,
+			     void *rx_buf, unsigned int rx_len)
+{
+	int ret;
+	u8 chan;
+	struct scpi_xfer *msg;
+	struct scpi_chan *scpi_chan;
+
+	chan = atomic_inc_return(&scpi_info->next_chan) % scpi_info->num_chans;
+	scpi_chan = scpi_info->channels + chan;
+
+	msg = get_scpi_xfer(scpi_chan);
+	if (!msg)
+		return -ENOMEM;
+
+	msg->slot = BIT(SCPI_SLOT);
+	msg->cmd = PACK_SCPI_CMD(cmd, tx_len);
+	msg->tx_buf = tx_buf;
+	msg->tx_len = tx_len;
+	msg->rx_buf = rx_buf;
+	msg->rx_len = rx_len;
+	init_completion(&msg->done);
+
+	ret = mbox_send_message(scpi_chan->chan, msg);
+	if (ret < 0 || !rx_buf)
+		goto out;
+
+	if (!wait_for_completion_timeout(&msg->done, MAX_RX_TIMEOUT))
+		ret = -ETIMEDOUT;
+	else
+		/* first status word */
+		ret = le32_to_cpu(msg->status);
+out:
+	if (ret < 0 && rx_buf) /* remove entry from the list if timed-out */
+		scpi_process_cmd(scpi_chan, msg->cmd);
+
+	put_scpi_xfer(msg, scpi_chan);
+	/* SCPI error codes > 0, translate them to Linux scale*/
+	return ret > 0 ? scpi_to_linux_errno(ret) : ret;
+}
+
+static u32 scpi_get_version(void)
+{
+	return scpi_info->protocol_version;
+}
+
+static int
+scpi_clk_get_range(u16 clk_id, unsigned long *min, unsigned long *max)
+{
+	int ret;
+	struct clk_get_info clk;
+	__le16 le_clk_id = cpu_to_le16(clk_id);
+
+	ret = scpi_send_message(SCPI_CMD_GET_CLOCK_INFO, &le_clk_id,
+				sizeof(le_clk_id), &clk, sizeof(clk));
+	if (!ret) {
+		*min = le32_to_cpu(clk.min_rate);
+		*max = le32_to_cpu(clk.max_rate);
+	}
+	return ret;
+}
+
+static unsigned long scpi_clk_get_val(u16 clk_id)
+{
+	int ret;
+	struct clk_get_value clk;
+	__le16 le_clk_id = cpu_to_le16(clk_id);
+
+	ret = scpi_send_message(SCPI_CMD_GET_CLOCK_VALUE, &le_clk_id,
+				sizeof(le_clk_id), &clk, sizeof(clk));
+	return ret ? ret : le32_to_cpu(clk.rate);
+}
+
+static int scpi_clk_set_val(u16 clk_id, unsigned long rate)
+{
+	int stat;
+	struct clk_set_value clk = {
+		.id = cpu_to_le16(clk_id),
+		.rate = cpu_to_le32(rate)
+	};
+
+	return scpi_send_message(SCPI_CMD_SET_CLOCK_VALUE, &clk, sizeof(clk),
+				 &stat, sizeof(stat));
+}
+
+static int scpi_dvfs_get_idx(u8 domain)
+{
+	int ret;
+	struct dvfs_get dvfs;
+
+	ret = scpi_send_message(SCPI_CMD_GET_DVFS, &domain, sizeof(domain),
+				&dvfs, sizeof(dvfs));
+	return ret ? ret : dvfs.index;
+}
+
+static int scpi_dvfs_set_idx(u8 domain, u8 index)
+{
+	int stat;
+	struct dvfs_set dvfs = {domain, index};
+
+	return scpi_send_message(SCPI_CMD_SET_DVFS, &dvfs, sizeof(dvfs),
+				 &stat, sizeof(stat));
+}
+
+static int opp_cmp_func(const void *opp1, const void *opp2)
+{
+	const struct scpi_opp *t1 = opp1, *t2 = opp2;
+
+	return t1->freq - t2->freq;
+}
+
+static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain)
+{
+	struct scpi_dvfs_info *info;
+	struct scpi_opp *opp;
+	struct dvfs_info buf;
+	int ret, i;
+
+	if (domain >= MAX_DVFS_DOMAINS)
+		return ERR_PTR(-EINVAL);
+
+	if (scpi_info->dvfs[domain])	/* data already populated */
+		return scpi_info->dvfs[domain];
+
+	ret = scpi_send_message(SCPI_CMD_GET_DVFS_INFO, &domain, sizeof(domain),
+				&buf, sizeof(buf));
+
+	if (ret)
+		return ERR_PTR(ret);
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	info->count = DVFS_OPP_COUNT(buf.header);
+	info->latency = DVFS_LATENCY(buf.header) * 1000; /* uS to nS */
+
+	info->opps = kcalloc(info->count, sizeof(*opp), GFP_KERNEL);
+	if (!info->opps) {
+		kfree(info);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, opp = info->opps; i < info->count; i++, opp++) {
+		opp->freq = le32_to_cpu(buf.opps[i].freq);
+		opp->m_volt = le32_to_cpu(buf.opps[i].m_volt);
+	}
+
+	sort(info->opps, info->count, sizeof(*opp), opp_cmp_func, NULL);
+
+	scpi_info->dvfs[domain] = info;
+	return info;
+}
+
+static struct scpi_ops scpi_ops = {
+	.get_version = scpi_get_version,
+	.clk_get_range = scpi_clk_get_range,
+	.clk_get_val = scpi_clk_get_val,
+	.clk_set_val = scpi_clk_set_val,
+	.dvfs_get_idx = scpi_dvfs_get_idx,
+	.dvfs_set_idx = scpi_dvfs_set_idx,
+	.dvfs_get_info = scpi_dvfs_get_info,
+};
+
+struct scpi_ops *get_scpi_ops(void)
+{
+	return scpi_info ? scpi_info->scpi_ops : NULL;
+}
+EXPORT_SYMBOL_GPL(get_scpi_ops);
+
+static int scpi_init_versions(struct scpi_drvinfo *info)
+{
+	int ret;
+	struct scp_capabilities caps;
+
+	ret = scpi_send_message(SCPI_CMD_SCPI_CAPABILITIES, NULL, 0,
+				&caps, sizeof(caps));
+	if (!ret) {
+		info->protocol_version = le32_to_cpu(caps.protocol_version);
+		info->firmware_version = le32_to_cpu(caps.platform_version);
+	}
+	return ret;
+}
+
+static ssize_t protocol_version_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d\n",
+		       PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		       PROTOCOL_REV_MINOR(scpi_info->protocol_version));
+}
+static DEVICE_ATTR_RO(protocol_version);
+
+static ssize_t firmware_version_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d.%d\n",
+		       FW_REV_MAJOR(scpi_info->firmware_version),
+		       FW_REV_MINOR(scpi_info->firmware_version),
+		       FW_REV_PATCH(scpi_info->firmware_version));
+}
+static DEVICE_ATTR_RO(firmware_version);
+
+static struct attribute *versions_attrs[] = {
+	&dev_attr_firmware_version.attr,
+	&dev_attr_protocol_version.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(versions);
+
+static void
+scpi_free_channels(struct device *dev, struct scpi_chan *pchan, int count)
+{
+	int i;
+
+	for (i = 0; i < count && pchan->chan; i++, pchan++) {
+		mbox_free_channel(pchan->chan);
+		devm_kfree(dev, pchan->xfers);
+		devm_iounmap(dev, pchan->rx_payload);
+	}
+}
+
+static int scpi_remove(struct platform_device *pdev)
+{
+	int i;
+	struct device *dev = &pdev->dev;
+	struct scpi_drvinfo *info = platform_get_drvdata(pdev);
+
+	scpi_info = NULL; /* stop exporting SCPI ops through get_scpi_ops */
+
+	of_platform_depopulate(dev);
+	sysfs_remove_groups(&dev->kobj, versions_groups);
+	scpi_free_channels(dev, info->channels, info->num_chans);
+	platform_set_drvdata(pdev, NULL);
+
+	for (i = 0; i < MAX_DVFS_DOMAINS && info->dvfs[i]; i++) {
+		kfree(info->dvfs[i]->opps);
+		kfree(info->dvfs[i]);
+	}
+	devm_kfree(dev, info->channels);
+	devm_kfree(dev, info);
+
+	return 0;
+}
+
+#define MAX_SCPI_XFERS		10
+static int scpi_alloc_xfer_list(struct device *dev, struct scpi_chan *ch)
+{
+	int i;
+	struct scpi_xfer *xfers;
+
+	xfers = devm_kzalloc(dev, MAX_SCPI_XFERS * sizeof(*xfers), GFP_KERNEL);
+	if (!xfers)
+		return -ENOMEM;
+
+	ch->xfers = xfers;
+	for (i = 0; i < MAX_SCPI_XFERS; i++, xfers++)
+		list_add_tail(&xfers->node, &ch->xfers_list);
+	return 0;
+}
+
+static int scpi_probe(struct platform_device *pdev)
+{
+	int count, idx, ret;
+	struct resource res;
+	struct scpi_chan *scpi_chan;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+
+	scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL);
+	if (!scpi_info)
+		return -ENOMEM;
+
+	count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
+	if (count < 0) {
+		dev_err(dev, "no mboxes property in '%s'\n", np->full_name);
+		return -ENODEV;
+	}
+
+	scpi_chan = devm_kcalloc(dev, count, sizeof(*scpi_chan), GFP_KERNEL);
+	if (!scpi_chan)
+		return -ENOMEM;
+
+	for (idx = 0; idx < count; idx++) {
+		resource_size_t size;
+		struct scpi_chan *pchan = scpi_chan + idx;
+		struct mbox_client *cl = &pchan->cl;
+		struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
+
+		if (of_address_to_resource(shmem, 0, &res)) {
+			dev_err(dev, "failed to get SCPI payload mem resource\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		size = resource_size(&res);
+		pchan->rx_payload = devm_ioremap(dev, res.start, size);
+		if (!pchan->rx_payload) {
+			dev_err(dev, "failed to ioremap SCPI payload\n");
+			ret = -EADDRNOTAVAIL;
+			goto err;
+		}
+		pchan->tx_payload = pchan->rx_payload + (size >> 1);
+
+		cl->dev = dev;
+		cl->rx_callback = scpi_handle_remote_msg;
+		cl->tx_prepare = scpi_tx_prepare;
+		cl->tx_block = true;
+		cl->tx_tout = 50;
+		cl->knows_txdone = false; /* controller can't ack */
+
+		INIT_LIST_HEAD(&pchan->rx_pending);
+		INIT_LIST_HEAD(&pchan->xfers_list);
+		spin_lock_init(&pchan->rx_lock);
+		mutex_init(&pchan->xfers_lock);
+
+		ret = scpi_alloc_xfer_list(dev, pchan);
+		if (!ret) {
+			pchan->chan = mbox_request_channel(cl, idx);
+			if (!IS_ERR(pchan->chan))
+				continue;
+			ret = PTR_ERR(pchan->chan);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "failed to get channel%d err %d\n",
+					idx, ret);
+		}
+err:
+		scpi_free_channels(dev, scpi_chan, idx);
+		scpi_info = NULL;
+		return ret;
+	}
+
+	scpi_info->channels = scpi_chan;
+	scpi_info->num_chans = count;
+	platform_set_drvdata(pdev, scpi_info);
+
+	ret = scpi_init_versions(scpi_info);
+	if (ret) {
+		dev_err(dev, "incorrect or no SCP firmware found\n");
+		scpi_remove(pdev);
+		return ret;
+	}
+
+	_dev_info(dev, "SCP Protocol %d.%d Firmware %d.%d.%d version\n",
+		  PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		  PROTOCOL_REV_MINOR(scpi_info->protocol_version),
+		  FW_REV_MAJOR(scpi_info->firmware_version),
+		  FW_REV_MINOR(scpi_info->firmware_version),
+		  FW_REV_PATCH(scpi_info->firmware_version));
+	scpi_info->scpi_ops = &scpi_ops;
+
+	ret = sysfs_create_groups(&dev->kobj, versions_groups);
+	if (ret)
+		dev_err(dev, "unable to create sysfs version group\n");
+
+	return of_platform_populate(dev->of_node, NULL, NULL, dev);
+}
+
+static const struct of_device_id scpi_of_match[] = {
+	{.compatible = "arm,scpi"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, scpi_of_match);
+
+static struct platform_driver scpi_driver = {
+	.driver = {
+		.name = "scpi_protocol",
+		.of_match_table = scpi_of_match,
+	},
+	.probe = scpi_probe,
+	.remove = scpi_remove,
+};
+module_platform_driver(scpi_driver);
+
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+MODULE_DESCRIPTION("ARM SCPI mailbox protocol driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
new file mode 100644
index 000000000000..e7169cd54e19
--- /dev/null
+++ b/include/linux/scpi_protocol.h
@@ -0,0 +1,61 @@
+/*
+ * SCPI Message Protocol driver header
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/types.h>
+
+struct scpi_opp {
+	u32 freq;
+	u32 m_volt;
+} __packed;
+
+struct scpi_dvfs_info {
+	unsigned int count;
+	unsigned int latency; /* in nanoseconds */
+	struct scpi_opp *opps;
+};
+
+/**
+ * struct scpi_ops - represents the various operations provided
+ *	by SCP through SCPI message protocol
+ * @get_version: returns the major and minor revision on the SCPI
+ *	message protocol
+ * @clk_get_range: gets clock range limit(min - max in Hz)
+ * @clk_get_val: gets clock value(in Hz)
+ * @clk_set_val: sets the clock value, setting to 0 will disable the
+ *	clock (if supported)
+ * @dvfs_get_idx: gets the Operating Point of the given power domain.
+ *	OPP is an index to the list return by @dvfs_get_info
+ * @dvfs_set_idx: sets the Operating Point of the given power domain.
+ *	OPP is an index to the list return by @dvfs_get_info
+ * @dvfs_get_info: returns the DVFS capabilities of the given power
+ *	domain. It includes the OPP list and the latency information
+ */
+struct scpi_ops {
+	u32 (*get_version)(void);
+	int (*clk_get_range)(u16, unsigned long *, unsigned long *);
+	unsigned long (*clk_get_val)(u16);
+	int (*clk_set_val)(u16, unsigned long);
+	int (*dvfs_get_idx)(u8);
+	int (*dvfs_set_idx)(u8, u8);
+	struct scpi_dvfs_info *(*dvfs_get_info)(u8);
+};
+
+#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
+struct scpi_ops *get_scpi_ops(void);
+#else
+static inline struct scpi_ops *get_scpi_ops(void) { return NULL; }
+#endif
-- 
cgit v1.2.3


From 9290a16cf19301224556bc7bcb913c0c2a45bb9a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 21 Aug 2015 11:48:37 +0000
Subject: dmaengine: OF DMAEngine API based on CONFIG_DMA_OF instead of
 CONFIG_OF

5fa422c ("dmaengine: move drivers/of/dma.c -> drivers/dma/of-dma.c")
moved OF base DMAEngine code to of-dma.c, then it based on CONFIG_DMA_OF.
But, OF base DMAEngine API on of_dma.h still based on CONFIG_OF now.
So, current kernel can't find OF base DMAEngine API if .config has CONFIG_OF,
but not have CONFIG_DMA_OF. This patch tidyup it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/of_dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 98ba7525929e..36112cdd665a 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -34,7 +34,7 @@ struct of_dma_filter_info {
 	dma_filter_fn	filter_fn;
 };
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_DMA_OF
 extern int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
 		(struct of_phandle_args *, struct of_dma *),
-- 
cgit v1.2.3


From 068654c200cc32966ce7906ca0bd096b9b97e988 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 26 May 2015 16:49:01 +0100
Subject: drivers: firmware: psci: move power_state handling to generic code

Functions implemented on arm64 to check if a power_state parameter
is valid and if the power_state implies context loss are not
arm64 specific and should be moved to generic code so that they
can be reused on arm systems too.

This patch moves the functions handling the power_state parameter
to generic PSCI firmware layer code.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Jisheng Zhang <jszhang@marvell.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/kernel/psci.c | 14 --------------
 drivers/firmware/psci.c  | 15 +++++++++++++++
 include/linux/psci.h     |  2 ++
 3 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index aa94a88f6279..f67f35b6edb1 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -30,20 +30,6 @@
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 
-static bool psci_power_state_loses_context(u32 state)
-{
-	return state & PSCI_0_2_POWER_STATE_TYPE_MASK;
-}
-
-static bool psci_power_state_is_valid(u32 state)
-{
-	const u32 valid_mask = PSCI_0_2_POWER_STATE_ID_MASK |
-			       PSCI_0_2_POWER_STATE_TYPE_MASK |
-			       PSCI_0_2_POWER_STATE_AFFL_MASK;
-
-	return !(state & ~valid_mask);
-}
-
 static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
 
 static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu)
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index 0821e332c85a..3157eb0ef300 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -70,6 +70,21 @@ enum psci_function {
 
 static u32 psci_function_id[PSCI_FN_MAX];
 
+#define PSCI_0_2_POWER_STATE_MASK		\
+				(PSCI_0_2_POWER_STATE_ID_MASK | \
+				PSCI_0_2_POWER_STATE_TYPE_MASK | \
+				PSCI_0_2_POWER_STATE_AFFL_MASK)
+
+bool psci_power_state_loses_context(u32 state)
+{
+	return state & PSCI_0_2_POWER_STATE_TYPE_MASK;
+}
+
+bool psci_power_state_is_valid(u32 state)
+{
+	return !(state & ~PSCI_0_2_POWER_STATE_MASK);
+}
+
 static int psci_to_linux_errno(int errno)
 {
 	switch (errno) {
diff --git a/include/linux/psci.h b/include/linux/psci.h
index a682fcc91c33..12c4865457ad 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -21,6 +21,8 @@
 #define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
 
 bool psci_tos_resident_on(int cpu);
+bool psci_power_state_loses_context(u32 state);
+bool psci_power_state_is_valid(u32 state);
 
 struct psci_operations {
 	int (*cpu_suspend)(u32 state, unsigned long entry_point);
-- 
cgit v1.2.3


From 7d8d05d11473a169ab4d53bc7fc23d1fe3f1959f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Sun, 16 Aug 2015 11:23:46 +0200
Subject: misc: atmel_tclib: get and use slow clock

Commit dca1a4b5ff6e ("clk: at91: keep slow clk enabled to prevent system
hang") added a workaround for the slow clock as it is not properly handled
by its users.

Get and use the slow clock as it is necessary for the timer counters.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/clocksource/tcb_clksrc.c | 10 +++++++++-
 drivers/misc/atmel_tclib.c       |  4 ++++
 drivers/pwm/pwm-atmel-tcb.c      | 26 +++++++++++++++++++-------
 include/linux/atmel_tc.h         |  1 +
 4 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index f8d11fcf80f1..6ee91401918e 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -193,10 +193,17 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
 	struct clk *t2_clk = tc->clk[2];
 	int irq = tc->irq[2];
 
+	ret = clk_prepare_enable(tc->slow_clk);
+	if (ret)
+		return ret;
+
 	/* try to enable t2 clk to avoid future errors in mode change */
 	ret = clk_prepare_enable(t2_clk);
-	if (ret)
+	if (ret) {
+		clk_disable_unprepare(tc->slow_clk);
 		return ret;
+	}
+
 	clk_disable(t2_clk);
 
 	clkevt.regs = tc->regs;
@@ -209,6 +216,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
 	ret = request_irq(irq, ch2_irq, IRQF_TIMER, "tc_clkevt", &clkevt);
 	if (ret) {
 		clk_unprepare(t2_clk);
+		clk_disable_unprepare(tc->slow_clk);
 		return ret;
 	}
 
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index 0ca05c3ec8d6..ac24a4bd63f7 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -125,6 +125,10 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk");
+	if (IS_ERR(tc->slow_clk))
+		return PTR_ERR(tc->slow_clk);
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tc->regs = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(tc->regs))
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index 6da01b3bf6f4..75db585a2a94 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -305,7 +305,7 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 */
 	if (i == 5) {
 		i = slowclk;
-		rate = 32768;
+		rate = clk_get_rate(tc->slow_clk);
 		min = div_u64(NSEC_PER_SEC, rate);
 		max = min << tc->tcb_config->counter_width;
 
@@ -387,9 +387,9 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 
 	tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
 	if (tcbpwm == NULL) {
-		atmel_tc_free(tc);
+		err = -ENOMEM;
 		dev_err(&pdev->dev, "failed to allocate memory\n");
-		return -ENOMEM;
+		goto err_free_tc;
 	}
 
 	tcbpwm->chip.dev = &pdev->dev;
@@ -400,17 +400,27 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 	tcbpwm->chip.npwm = NPWM;
 	tcbpwm->tc = tc;
 
+	err = clk_prepare_enable(tc->slow_clk);
+	if (err)
+		goto err_free_tc;
+
 	spin_lock_init(&tcbpwm->lock);
 
 	err = pwmchip_add(&tcbpwm->chip);
-	if (err < 0) {
-		atmel_tc_free(tc);
-		return err;
-	}
+	if (err < 0)
+		goto err_disable_clk;
 
 	platform_set_drvdata(pdev, tcbpwm);
 
 	return 0;
+
+err_disable_clk:
+	clk_disable_unprepare(tcbpwm->tc->slow_clk);
+
+err_free_tc:
+	atmel_tc_free(tc);
+
+	return err;
 }
 
 static int atmel_tcb_pwm_remove(struct platform_device *pdev)
@@ -418,6 +428,8 @@ static int atmel_tcb_pwm_remove(struct platform_device *pdev)
 	struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev);
 	int err;
 
+	clk_disable_unprepare(tcbpwm->tc->slow_clk);
+
 	err = pwmchip_remove(&tcbpwm->chip);
 	if (err < 0)
 		return err;
diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h
index b87c1c7c242a..468fdfa643f0 100644
--- a/include/linux/atmel_tc.h
+++ b/include/linux/atmel_tc.h
@@ -67,6 +67,7 @@ struct atmel_tc {
 	const struct atmel_tcb_config *tcb_config;
 	int			irq[3];
 	struct clk		*clk[3];
+	struct clk		*slow_clk;
 	struct list_head	node;
 	bool			allocated;
 };
-- 
cgit v1.2.3


From 7f5028cf6190407b7a632b0f30b83187577824cc Mon Sep 17 00:00:00 2001
From: Emilio López <emilio.lopez@collabora.co.uk>
Date: Mon, 21 Sep 2015 10:38:20 -0300
Subject: sysfs: Support is_visible() on binary attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the sysfs header file:

    "The returned value will replace static permissions defined in
     struct attribute or struct bin_attribute."

but this isn't the case, as is_visible is only called on struct attribute
only. This patch introduces a new is_bin_visible() function to implement
the same functionality for binary attributes, and updates documentation
accordingly.

Note that to keep functionality and code similar to that of normal
attributes, the mode is now checked as well to ensure it contains only
read/write permissions or SYSFS_PREALLOC.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Emilio López <emilio.lopez@collabora.co.uk>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 fs/sysfs/group.c      | 17 +++++++++++++++--
 include/linux/sysfs.h | 18 ++++++++++++++----
 2 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 39a019936768..51b56e6d9537 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -73,13 +73,26 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 	}
 
 	if (grp->bin_attrs) {
-		for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
+		for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) {
+			umode_t mode = (*bin_attr)->attr.mode;
+
 			if (update)
 				kernfs_remove_by_name(parent,
 						(*bin_attr)->attr.name);
+			if (grp->is_bin_visible) {
+				mode = grp->is_bin_visible(kobj, *bin_attr, i);
+				if (!mode)
+					continue;
+			}
+
+			WARN(mode & ~(SYSFS_PREALLOC | 0664),
+			     "Attribute %s: Invalid permissions 0%o\n",
+			     (*bin_attr)->attr.name, mode);
+
+			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent,
 					&(*bin_attr)->attr, true,
-					(*bin_attr)->attr.mode, NULL);
+					mode, NULL);
 			if (error)
 				break;
 		}
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9f65758311a4..2f66050d073b 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -64,10 +64,18 @@ do {							\
  *		a new subdirectory with this name.
  * @is_visible:	Optional: Function to return permissions associated with an
  *		attribute of the group. Will be called repeatedly for each
- *		attribute in the group. Only read/write permissions as well as
- *		SYSFS_PREALLOC are accepted. Must return 0 if an attribute is
- *		not visible. The returned value will replace static permissions
- *		defined in struct attribute or struct bin_attribute.
+ *		non-binary attribute in the group. Only read/write
+ *		permissions as well as SYSFS_PREALLOC are accepted. Must
+ *		return 0 if an attribute is not visible. The returned value
+ *		will replace static permissions defined in struct attribute.
+ * @is_bin_visible:
+ *		Optional: Function to return permissions associated with a
+ *		binary attribute of the group. Will be called repeatedly
+ *		for each binary attribute in the group. Only read/write
+ *		permissions as well as SYSFS_PREALLOC are accepted. Must
+ *		return 0 if a binary attribute is not visible. The returned
+ *		value will replace static permissions defined in
+ *		struct bin_attribute.
  * @attrs:	Pointer to NULL terminated list of attributes.
  * @bin_attrs:	Pointer to NULL terminated list of binary attributes.
  *		Either attrs or bin_attrs or both must be provided.
@@ -76,6 +84,8 @@ struct attribute_group {
 	const char		*name;
 	umode_t			(*is_visible)(struct kobject *,
 					      struct attribute *, int);
+	umode_t			(*is_bin_visible)(struct kobject *,
+						  struct bin_attribute *, int);
 	struct attribute	**attrs;
 	struct bin_attribute	**bin_attrs;
 };
-- 
cgit v1.2.3


From 18800fc7a04e7df8a345e7ef4fc3064368276f83 Mon Sep 17 00:00:00 2001
From: Emilio López <emilio.lopez@collabora.co.uk>
Date: Mon, 21 Sep 2015 10:38:22 -0300
Subject: platform/chrome: Support reading/writing the vboot context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some EC implementations include a small nvram space used to store
verified boot context data. This patch offers a way to expose this
data to userspace.

Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Emilio López <emilio.lopez@collabora.co.uk>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/platform/chrome/Makefile      |   3 +-
 drivers/platform/chrome/cros_ec_dev.c |   1 +
 drivers/platform/chrome/cros_ec_vbc.c | 137 ++++++++++++++++++++++++++++++++++
 include/linux/mfd/cros_ec.h           |   1 +
 4 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/chrome/cros_ec_vbc.c

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 4a11b010f5d8..bc498bda8211 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -1,7 +1,8 @@
 
 obj-$(CONFIG_CHROMEOS_LAPTOP)	+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)	+= chromeos_pstore.o
-cros_ec_devs-objs               := cros_ec_dev.o cros_ec_sysfs.o cros_ec_lightbar.o
+cros_ec_devs-objs		:= cros_ec_dev.o cros_ec_sysfs.o \
+				   cros_ec_lightbar.o cros_ec_vbc.o
 obj-$(CONFIG_CROS_EC_CHARDEV)   += cros_ec_devs.o
 obj-$(CONFIG_CROS_EC_LPC)       += cros_ec_lpc.o
 obj-$(CONFIG_CROS_EC_PROTO)	+= cros_ec_proto.o
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 2f4099820480..d45cd254ed1c 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -32,6 +32,7 @@ static int ec_major;
 static const struct attribute_group *cros_ec_groups[] = {
 	&cros_ec_attr_group,
 	&cros_ec_lightbar_attr_group,
+	&cros_ec_vbc_attr_group,
 	NULL,
 };
 
diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c
new file mode 100644
index 000000000000..564a0d08c8bf
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_vbc.c
@@ -0,0 +1,137 @@
+/*
+ * cros_ec_vbc - Expose the vboot context nvram to userspace
+ *
+ * Copyright (C) 2015 Collabora Ltd.
+ *
+ * based on vendor driver,
+ *
+ * Copyright (C) 2012 The Chromium OS Authors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/slab.h>
+
+static ssize_t vboot_context_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *att, char *buf,
+				  loff_t pos, size_t count)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+	struct cros_ec_device *ecdev = ec->ec_dev;
+	struct ec_params_vbnvcontext *params;
+	struct cros_ec_command *msg;
+	int err;
+	const size_t para_sz = sizeof(params->op);
+	const size_t resp_sz = sizeof(struct ec_response_vbnvcontext);
+	const size_t payload = max(para_sz, resp_sz);
+
+	msg = kmalloc(sizeof(*msg) + payload, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	/* NB: we only kmalloc()ated enough space for the op field */
+	params = (struct ec_params_vbnvcontext *)msg->data;
+	params->op = EC_VBNV_CONTEXT_OP_READ;
+
+	msg->version = EC_VER_VBNV_CONTEXT;
+	msg->command = EC_CMD_VBNV_CONTEXT;
+	msg->outsize = para_sz;
+	msg->insize = resp_sz;
+
+	err = cros_ec_cmd_xfer(ecdev, msg);
+	if (err < 0) {
+		dev_err(dev, "Error sending read request: %d\n", err);
+		kfree(msg);
+		return err;
+	}
+
+	memcpy(buf, msg->data, resp_sz);
+
+	kfree(msg);
+	return resp_sz;
+}
+
+static ssize_t vboot_context_write(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr, char *buf,
+				   loff_t pos, size_t count)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+	struct cros_ec_device *ecdev = ec->ec_dev;
+	struct ec_params_vbnvcontext *params;
+	struct cros_ec_command *msg;
+	int err;
+	const size_t para_sz = sizeof(*params);
+	const size_t data_sz = sizeof(params->block);
+
+	/* Only write full values */
+	if (count != data_sz)
+		return -EINVAL;
+
+	msg = kmalloc(sizeof(*msg) + para_sz, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	params = (struct ec_params_vbnvcontext *)msg->data;
+	params->op = EC_VBNV_CONTEXT_OP_WRITE;
+	memcpy(params->block, buf, data_sz);
+
+	msg->version = EC_VER_VBNV_CONTEXT;
+	msg->command = EC_CMD_VBNV_CONTEXT;
+	msg->outsize = para_sz;
+	msg->insize = 0;
+
+	err = cros_ec_cmd_xfer(ecdev, msg);
+	if (err < 0) {
+		dev_err(dev, "Error sending write request: %d\n", err);
+		kfree(msg);
+		return err;
+	}
+
+	kfree(msg);
+	return data_sz;
+}
+
+static umode_t cros_ec_vbc_is_visible(struct kobject *kobj,
+				      struct bin_attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+	struct device_node *np = ec->ec_dev->dev->of_node;
+
+	if (IS_ENABLED(CONFIG_OF) && np) {
+		if (of_property_read_bool(np, "google,has-vbc-nvram"))
+			return a->attr.mode;
+	}
+
+	return 0;
+}
+
+static BIN_ATTR_RW(vboot_context, 16);
+
+static struct bin_attribute *cros_ec_vbc_bin_attrs[] = {
+	&bin_attr_vboot_context,
+	NULL
+};
+
+struct attribute_group cros_ec_vbc_attr_group = {
+	.name = "vbc",
+	.bin_attrs = cros_ec_vbc_bin_attrs,
+	.is_bin_visible = cros_ec_vbc_is_visible,
+};
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index da72671a42fa..494682ce4bf3 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -255,5 +255,6 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;
 extern struct attribute_group cros_ec_lightbar_attr_group;
+extern struct attribute_group cros_ec_vbc_attr_group;
 
 #endif /* __LINUX_MFD_CROS_EC_H */
-- 
cgit v1.2.3


From edc1b01cd3b20a5fff049e98f82a2b0d24a34c89 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 5 Oct 2015 10:53:49 -0400
Subject: SUNRPC: Move TCP receive data path into a workqueue context

Stream protocols such as TCP can often build up a backlog of data to be
read due to ordering. Combine this with the fact that some workloads such
as NFS read()-intensive workloads need to receive a lot of data per RPC
call, and it turns out that receiving the data from inside a softirq
context can cause starvation.

The following patch moves the TCP data receive into a workqueue context.
We still end up calling tcp_read_sock(), but we do so from a process
context, meaning that softirqs are enabled for most of the time.

With this patch, I see a doubling of read bandwidth when running a
multi-threaded iozone workload between a virtual client and server setup.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprtsock.h |  2 ++
 net/sunrpc/xprtsock.c           | 51 +++++++++++++++++++++++++++++------------
 2 files changed, 38 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 357e44c1a46b..0ece4ba06f06 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -44,6 +44,8 @@ struct sock_xprt {
 	 */
 	unsigned long		sock_state;
 	struct delayed_work	connect_worker;
+	struct work_struct	recv_worker;
+	struct mutex		recv_mutex;
 	struct sockaddr_storage	srcaddr;
 	unsigned short		srcport;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fa8d0c15c8cd..58dc90ccebb6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -823,6 +823,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 
 	kernel_sock_shutdown(sock, SHUT_RDWR);
 
+	mutex_lock(&transport->recv_mutex);
 	write_lock_bh(&sk->sk_callback_lock);
 	transport->inet = NULL;
 	transport->sock = NULL;
@@ -833,6 +834,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	xprt_clear_connected(xprt);
 	write_unlock_bh(&sk->sk_callback_lock);
 	xs_sock_reset_connection_flags(xprt);
+	mutex_unlock(&transport->recv_mutex);
 
 	trace_rpc_socket_close(xprt, sock);
 	sock_release(sock);
@@ -886,6 +888,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	cancel_delayed_work_sync(&transport->connect_worker);
 	xs_close(xprt);
+	cancel_work_sync(&transport->recv_worker);
 	xs_xprt_free(xprt);
 	module_put(THIS_MODULE);
 }
@@ -1243,12 +1246,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
 
 	/* Find and lock the request corresponding to this xid */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	req = xprt_lookup_rqst(xprt, transport->tcp_xid);
 	if (!req) {
 		dprintk("RPC:       XID %08x request not found!\n",
 				ntohl(transport->tcp_xid));
-		spin_unlock(&xprt->transport_lock);
+		spin_unlock_bh(&xprt->transport_lock);
 		return -1;
 	}
 
@@ -1257,7 +1260,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_rqst(req->rq_task, transport->tcp_copied);
 
-	spin_unlock(&xprt->transport_lock);
+	spin_unlock_bh(&xprt->transport_lock);
 	return 0;
 }
 
@@ -1277,10 +1280,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 	struct rpc_rqst *req;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
 	if (req == NULL) {
-		spin_unlock(&xprt->transport_lock);
+		spin_unlock_bh(&xprt->transport_lock);
 		printk(KERN_WARNING "Callback slot table overflowed\n");
 		xprt_force_disconnect(xprt);
 		return -1;
@@ -1291,7 +1294,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_bc_request(req, transport->tcp_copied);
-	spin_unlock(&xprt->transport_lock);
+	spin_unlock_bh(&xprt->transport_lock);
 
 	return 0;
 }
@@ -1402,19 +1405,33 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
 	unsigned long total = 0;
 	int read = 0;
 
+	mutex_lock(&transport->recv_mutex);
 	sk = transport->inet;
+	if (sk == NULL)
+		goto out;
 
 	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
 	for (;;) {
+		lock_sock(sk);
 		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
+		release_sock(sk);
 		if (read <= 0)
 			break;
 		total += read;
 		rd_desc.count = 65536;
 	}
+out:
+	mutex_unlock(&transport->recv_mutex);
 	trace_xs_tcp_data_ready(xprt, read, total);
 }
 
+static void xs_tcp_data_receive_workfn(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, recv_worker);
+	xs_tcp_data_receive(transport);
+}
+
 /**
  * xs_tcp_data_ready - "data ready" callback for TCP sockets
  * @sk: socket with data to read
@@ -1437,8 +1454,8 @@ static void xs_tcp_data_ready(struct sock *sk)
 	 */
 	if (xprt->reestablish_timeout)
 		xprt->reestablish_timeout = 0;
+	queue_work(rpciod_workqueue, &transport->recv_worker);
 
-	xs_tcp_data_receive(transport);
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1840,6 +1857,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
 }
 #endif
 
+static void xs_dummy_data_receive_workfn(struct work_struct *work)
+{
+}
+
 static void xs_dummy_setup_socket(struct work_struct *work)
 {
 }
@@ -2664,6 +2685,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 	}
 
 	new = container_of(xprt, struct sock_xprt, xprt);
+	mutex_init(&new->recv_mutex);
 	memcpy(&xprt->addr, args->dstaddr, args->addrlen);
 	xprt->addrlen = args->addrlen;
 	if (args->srcaddr)
@@ -2717,6 +2739,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 	xprt->ops = &xs_local_ops;
 	xprt->timeout = &xs_local_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
 	INIT_DELAYED_WORK(&transport->connect_worker,
 			xs_dummy_setup_socket);
 
@@ -2788,21 +2811,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 	xprt->timeout = &xs_udp_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_dummy_data_receive_workfn);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_udp_setup_socket);
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_udp_setup_socket);
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
 		break;
 	default:
@@ -2867,21 +2889,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->ops = &xs_tcp_ops;
 	xprt->timeout = &xs_tcp_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_tcp_setup_socket);
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_tcp_setup_socket);
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
 		break;
 	default:
-- 
cgit v1.2.3


From 516285ebe0efadc40b914a0e61a913a390604810 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 20 Sep 2015 16:15:24 -0400
Subject: NFSv4: nfs4_async_handle_error should take a non-const nfs_server

For symmetry with the synchronous handler, and so that we can potentially
handle errors such as NFS4ERR_BADNAME.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 6 +++---
 include/linux/nfs_xdr.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d044c7b11ff7..ae5cde621954 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -78,7 +78,7 @@ struct nfs4_opendata;
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *);
+static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *, struct nfs4_state *, long *);
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
@@ -4982,7 +4982,7 @@ out:
 
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
+nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
 			struct nfs4_state *state, long *timeout)
 {
 	struct nfs_client *clp = server->nfs_client;
@@ -5559,7 +5559,7 @@ struct nfs4_unlockdata {
 	struct nfs4_lock_state *lsp;
 	struct nfs_open_context *ctx;
 	struct file_lock fl;
-	const struct nfs_server *server;
+	struct nfs_server *server;
 	unsigned long timestamp;
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 52faf7e96c65..53f2acc68baf 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -528,7 +528,7 @@ struct nfs4_delegreturnargs {
 struct nfs4_delegreturnres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr * fattr;
-	const struct nfs_server *server;
+	struct nfs_server *server;
 };
 
 /*
@@ -601,7 +601,7 @@ struct nfs_removeargs {
 
 struct nfs_removeres {
 	struct nfs4_sequence_res 	seq_res;
-	const struct nfs_server *server;
+	struct nfs_server *server;
 	struct nfs_fattr	*dir_attr;
 	struct nfs4_change_info	cinfo;
 };
@@ -619,7 +619,7 @@ struct nfs_renameargs {
 
 struct nfs_renameres {
 	struct nfs4_sequence_res	seq_res;
-	const struct nfs_server		*server;
+	struct nfs_server		*server;
 	struct nfs4_change_info		old_cinfo;
 	struct nfs_fattr		*old_fattr;
 	struct nfs4_change_info		new_cinfo;
-- 
cgit v1.2.3


From 38a1bdc9ff9f6c8cfad228eac5c1ce31ce038b25 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Fri, 19 Jun 2015 15:31:46 +0100
Subject: firmware: arm_scpi: Extend to support sensors

ARM System Control Processor (SCP) provides an API to query and use
the sensors available in the system. Extend the SCPI driver to support
 sensor messages.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scpi.c   | 60 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/scpi_protocol.h | 17 ++++++++++++
 2 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index cb75c750ca54..6174db80c663 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -219,6 +219,21 @@ struct dvfs_set {
 	u8 index;
 } __packed;
 
+struct sensor_capabilities {
+	__le16 sensors;
+} __packed;
+
+struct _scpi_sensor_info {
+	__le16 sensor_id;
+	u8 class;
+	u8 trigger_type;
+	char name[20];
+};
+
+struct sensor_value {
+	__le32 val;
+} __packed;
+
 static struct scpi_drvinfo *scpi_info;
 
 static int scpi_linux_errmap[SCPI_ERR_MAX] = {
@@ -481,6 +496,48 @@ static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain)
 	return info;
 }
 
+static int scpi_sensor_get_capability(u16 *sensors)
+{
+	struct sensor_capabilities cap_buf;
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_CAPABILITIES, NULL, 0, &cap_buf,
+				sizeof(cap_buf));
+	if (!ret)
+		*sensors = le16_to_cpu(cap_buf.sensors);
+
+	return ret;
+}
+
+static int scpi_sensor_get_info(u16 sensor_id, struct scpi_sensor_info *info)
+{
+	__le16 id = cpu_to_le16(sensor_id);
+	struct _scpi_sensor_info _info;
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_INFO, &id, sizeof(id),
+				&_info, sizeof(_info));
+	if (!ret) {
+		memcpy(info, &_info, sizeof(*info));
+		info->sensor_id = le16_to_cpu(_info.sensor_id);
+	}
+
+	return ret;
+}
+
+int scpi_sensor_get_value(u16 sensor, u32 *val)
+{
+	struct sensor_value buf;
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_VALUE, &sensor, sizeof(sensor),
+				&buf, sizeof(buf));
+	if (!ret)
+		*val = le32_to_cpu(buf.val);
+
+	return ret;
+}
+
 static struct scpi_ops scpi_ops = {
 	.get_version = scpi_get_version,
 	.clk_get_range = scpi_clk_get_range,
@@ -489,6 +546,9 @@ static struct scpi_ops scpi_ops = {
 	.dvfs_get_idx = scpi_dvfs_get_idx,
 	.dvfs_set_idx = scpi_dvfs_set_idx,
 	.dvfs_get_info = scpi_dvfs_get_info,
+	.sensor_get_capability = scpi_sensor_get_capability,
+	.sensor_get_info = scpi_sensor_get_info,
+	.sensor_get_value = scpi_sensor_get_value,
 };
 
 struct scpi_ops *get_scpi_ops(void)
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index e7169cd54e19..80af3cd35ae4 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -28,6 +28,20 @@ struct scpi_dvfs_info {
 	struct scpi_opp *opps;
 };
 
+enum scpi_sensor_class {
+	TEMPERATURE,
+	VOLTAGE,
+	CURRENT,
+	POWER,
+};
+
+struct scpi_sensor_info {
+	u16 sensor_id;
+	u8 class;
+	u8 trigger_type;
+	char name[20];
+} __packed;
+
 /**
  * struct scpi_ops - represents the various operations provided
  *	by SCP through SCPI message protocol
@@ -52,6 +66,9 @@ struct scpi_ops {
 	int (*dvfs_get_idx)(u8);
 	int (*dvfs_set_idx)(u8, u8);
 	struct scpi_dvfs_info *(*dvfs_get_info)(u8);
+	int (*sensor_get_capability)(u16 *sensors);
+	int (*sensor_get_info)(u16 sensor_id, struct scpi_sensor_info *);
+	int (*sensor_get_value)(u16, u32 *);
 };
 
 #if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
-- 
cgit v1.2.3


From 9a764234eee689ea800424ab99b08ff07a8bdbcd Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 14 Sep 2015 12:09:34 -0700
Subject: soc: brcmstb: Add Bus Interface Unit control setup

Broadcom STB SoCs (brcmstb) require an early setup of their Bus
Interface Unit control register, this needs to happen before SMP is
brought up because it affects how the CPU complex will be interfaced to
the memory controller.

Add support code which properly initializes the BIU registers based on
whether "brcm,write-pairing" is present in Device Tree, and take care of
saving and restoring credit register settings during system-wide
suspend/resume operations.

Acked-by: Gregory Fong <gregory.0xf0@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/soc/brcmstb/Makefile        |   2 +-
 drivers/soc/brcmstb/biuctrl.c       | 116 ++++++++++++++++++++++++++++++++++++
 include/linux/soc/brcmstb/brcmstb.h |  10 ++++
 3 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/brcmstb/biuctrl.c
 create mode 100644 include/linux/soc/brcmstb/brcmstb.h

(limited to 'include/linux')

diff --git a/drivers/soc/brcmstb/Makefile b/drivers/soc/brcmstb/Makefile
index 183280e39f80..9120b2715d3e 100644
--- a/drivers/soc/brcmstb/Makefile
+++ b/drivers/soc/brcmstb/Makefile
@@ -1 +1 @@
-obj-y				+= common.o
+obj-y				+= common.o biuctrl.o
diff --git a/drivers/soc/brcmstb/biuctrl.c b/drivers/soc/brcmstb/biuctrl.c
new file mode 100644
index 000000000000..9049c076f9a1
--- /dev/null
+++ b/drivers/soc/brcmstb/biuctrl.c
@@ -0,0 +1,116 @@
+/*
+ * Broadcom STB SoCs Bus Unit Interface controls
+ *
+ * Copyright (C) 2015, Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)	"brcmstb: " KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/syscore_ops.h>
+
+#define CPU_CREDIT_REG_OFFSET			0x184
+#define  CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK	0x70000000
+
+static void __iomem *cpubiuctrl_base;
+static bool mcp_wr_pairing_en;
+
+static int __init mcp_write_pairing_set(void)
+{
+	u32 creds = 0;
+
+	if (!cpubiuctrl_base)
+		return -1;
+
+	creds = readl_relaxed(cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	if (mcp_wr_pairing_en) {
+		pr_info("MCP: Enabling write pairing\n");
+		writel_relaxed(creds | CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK,
+			     cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	} else if (creds & CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK) {
+		pr_info("MCP: Disabling write pairing\n");
+		writel_relaxed(creds & ~CPU_CREDIT_REG_MCPx_WR_PAIRING_EN_MASK,
+				cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	} else {
+		pr_info("MCP: Write pairing already disabled\n");
+	}
+
+	return 0;
+}
+
+static int __init setup_hifcpubiuctrl_regs(void)
+{
+	struct device_node *np;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
+	if (!np) {
+		pr_err("missing BIU control node\n");
+		return -ENODEV;
+	}
+
+	cpubiuctrl_base = of_iomap(np, 0);
+	if (!cpubiuctrl_base) {
+		pr_err("failed to remap BIU control base\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mcp_wr_pairing_en = of_property_read_bool(np, "brcm,write-pairing");
+out:
+	of_node_put(np);
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static u32 cpu_credit_reg_dump;  /* for save/restore */
+
+static int brcmstb_cpu_credit_reg_suspend(void)
+{
+	if (cpubiuctrl_base)
+		cpu_credit_reg_dump =
+			readl_relaxed(cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+	return 0;
+}
+
+static void brcmstb_cpu_credit_reg_resume(void)
+{
+	if (cpubiuctrl_base)
+		writel_relaxed(cpu_credit_reg_dump,
+				cpubiuctrl_base + CPU_CREDIT_REG_OFFSET);
+}
+
+static struct syscore_ops brcmstb_cpu_credit_syscore_ops = {
+	.suspend = brcmstb_cpu_credit_reg_suspend,
+	.resume = brcmstb_cpu_credit_reg_resume,
+};
+#endif
+
+
+void __init brcmstb_biuctrl_init(void)
+{
+	int ret;
+
+	setup_hifcpubiuctrl_regs();
+
+	ret = mcp_write_pairing_set();
+	if (ret) {
+		pr_err("MCP: Unable to disable write pairing!\n");
+		return;
+	}
+
+#ifdef CONFIG_PM_SLEEP
+	register_syscore_ops(&brcmstb_cpu_credit_syscore_ops);
+#endif
+}
diff --git a/include/linux/soc/brcmstb/brcmstb.h b/include/linux/soc/brcmstb/brcmstb.h
new file mode 100644
index 000000000000..337ce414e898
--- /dev/null
+++ b/include/linux/soc/brcmstb/brcmstb.h
@@ -0,0 +1,10 @@
+#ifndef __BRCMSTB_SOC_H
+#define __BRCMSTB_SOC_H
+
+/*
+ * Bus Interface Unit control register setup, must happen early during boot,
+ * before SMP is brought up, called by machine entry point.
+ */
+void brcmstb_biuctrl_init(void);
+
+#endif /* __BRCMSTB_SOC_H */
-- 
cgit v1.2.3


From a639315d6c536c806724c9328941a2517507e3e3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 15 Sep 2015 02:14:03 -0400
Subject: pmem: kill memremap_pmem()

Now that the pmem-api is defined as "a set of apis that enables access
to WB mapped pmem",  the mapping type is implied.  Remove the wrapper
and push the functionality down into the pmem driver in preparation for
adding support for direct-mapped pmem.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c |  9 +++++----
 include/linux/pmem.h  | 26 +-------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 0ba6a978f227..0680affae04a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -157,8 +157,9 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 			return addr;
 		pmem->virt_addr = (void __pmem *) addr;
 	} else {
-		pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
-				pmem->size);
+		pmem->virt_addr = (void __pmem *) devm_memremap(dev,
+				pmem->phys_addr, pmem->size,
+				ARCH_MEMREMAP_PMEM);
 		if (!pmem->virt_addr)
 			return ERR_PTR(-ENXIO);
 	}
@@ -363,8 +364,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
 
 	/* establish pfn range for lookup, and switch to direct map */
 	pmem = dev_get_drvdata(dev);
-	memunmap_pmem(dev, pmem->virt_addr);
-	pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res);
+	devm_memunmap(dev, (void __force *) pmem->virt_addr);
+	pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res);
 	if (IS_ERR(pmem->virt_addr)) {
 		rc = PTR_ERR(pmem->virt_addr);
 		goto err;
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 85f810b33917..acfea8ce4a07 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -65,11 +65,6 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si
 	memcpy(dst, (void __force const *) src, size);
 }
 
-static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
-{
-	devm_memunmap(dev, (void __force *) addr);
-}
-
 static inline bool arch_has_pmem_api(void)
 {
 	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
@@ -93,7 +88,7 @@ static inline bool arch_has_wmb_pmem(void)
  * These defaults seek to offer decent performance and minimize the
  * window between i/o completion and writes being durable on media.
  * However, it is undefined / architecture specific whether
- * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
+ * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
  * making data durable relative to i/o completion.
  */
 static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
@@ -116,25 +111,6 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
 		memset((void __force *)addr, 0, size);
 }
 
-/**
- * memremap_pmem - map physical persistent memory for pmem api
- * @offset: physical address of persistent memory
- * @size: size of the mapping
- *
- * Establish a mapping of the architecture specific memory type expected
- * by memcpy_to_pmem() and wmb_pmem().  For example, it may be
- * the case that an uncacheable or writethrough mapping is sufficient,
- * or a writeback mapping provided memcpy_to_pmem() and
- * wmb_pmem() arrange for the data to be written through the
- * cache to persistent media.
- */
-static inline void __pmem *memremap_pmem(struct device *dev,
-		resource_size_t offset, unsigned long size)
-{
-	return (void __pmem *) devm_memremap(dev, offset, size,
-			ARCH_MEMREMAP_PMEM);
-}
-
 /**
  * memcpy_to_pmem - copy data to persistent memory
  * @dst: destination buffer for the copy
-- 
cgit v1.2.3


From 7c683941f30a977c10ec6be174ec5f16939c7ce5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Oct 2015 20:35:55 -0400
Subject: devm: make allocations numa aware by default

Given we already have a device just use dev_to_node() to provide hint
allocations for devres.  However, current devres_alloc() users will need
to explicitly opt-in with devres_alloc_node().

Reviewed-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/base/devres.c  | 19 ++++++++++---------
 include/linux/device.h | 16 ++++++++++++----
 2 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 875464690117..8fc654f0807b 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -82,12 +82,12 @@ static struct devres_group * node_to_group(struct devres_node *node)
 }
 
 static __always_inline struct devres * alloc_dr(dr_release_t release,
-						size_t size, gfp_t gfp)
+						size_t size, gfp_t gfp, int nid)
 {
 	size_t tot_size = sizeof(struct devres) + size;
 	struct devres *dr;
 
-	dr = kmalloc_track_caller(tot_size, gfp);
+	dr = kmalloc_node_track_caller(tot_size, gfp, nid);
 	if (unlikely(!dr))
 		return NULL;
 
@@ -106,24 +106,25 @@ static void add_dr(struct device *dev, struct devres_node *node)
 }
 
 #ifdef CONFIG_DEBUG_DEVRES
-void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
+void * __devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid,
 		      const char *name)
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid);
 	if (unlikely(!dr))
 		return NULL;
 	set_node_dbginfo(&dr->node, name, size);
 	return dr->data;
 }
-EXPORT_SYMBOL_GPL(__devres_alloc);
+EXPORT_SYMBOL_GPL(__devres_alloc_node);
 #else
 /**
  * devres_alloc - Allocate device resource data
  * @release: Release function devres will be associated with
  * @size: Allocation size
  * @gfp: Allocation flags
+ * @nid: NUMA node
  *
  * Allocate devres of @size bytes.  The allocated area is zeroed, then
  * associated with @release.  The returned pointer can be passed to
@@ -132,16 +133,16 @@ EXPORT_SYMBOL_GPL(__devres_alloc);
  * RETURNS:
  * Pointer to allocated devres on success, NULL on failure.
  */
-void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
+void * devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid)
 {
 	struct devres *dr;
 
-	dr = alloc_dr(release, size, gfp | __GFP_ZERO);
+	dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid);
 	if (unlikely(!dr))
 		return NULL;
 	return dr->data;
 }
-EXPORT_SYMBOL_GPL(devres_alloc);
+EXPORT_SYMBOL_GPL(devres_alloc_node);
 #endif
 
 /**
@@ -776,7 +777,7 @@ void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
 	struct devres *dr;
 
 	/* use raw alloc_dr for kmalloc caller tracing */
-	dr = alloc_dr(devm_kmalloc_release, size, gfp);
+	dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
 	if (unlikely(!dr))
 		return NULL;
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 5d7bc6349930..b8f411b57dcb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -604,13 +604,21 @@ typedef void (*dr_release_t)(struct device *dev, void *res);
 typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data);
 
 #ifdef CONFIG_DEBUG_DEVRES
-extern void *__devres_alloc(dr_release_t release, size_t size, gfp_t gfp,
-			     const char *name);
+extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
+				 int nid, const char *name);
 #define devres_alloc(release, size, gfp) \
-	__devres_alloc(release, size, gfp, #release)
+	__devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release)
+#define devres_alloc_node(release, size, gfp, nid) \
+	__devres_alloc_node(release, size, gfp, nid, #release)
 #else
-extern void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp);
+extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
+			       int nid);
+static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp)
+{
+	return devres_alloc_node(release, size, gfp, NUMA_NO_NODE);
+}
 #endif
+
 extern void devres_for_each_res(struct device *dev, dr_release_t release,
 				dr_match_t match, void *match_data,
 				void (*fn)(struct device *, void *, void *),
-- 
cgit v1.2.3


From e866a2e3950fe2f708d5cc67d641b1725ef7a708 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 1 Oct 2015 23:45:31 +0200
Subject: linux/thermal.h: rename KELVIN_TO_CELSIUS to DECI_KELVIN_TO_CELSIUS

The macros KELVIN_TO_CELSIUS and CELSIUS_TO_KELVIN actually convert
between deciKelvins and Celsius, so rename them to reflect that. While
at it, use a statement expression in DECI_KELVIN_TO_CELSIUS to prevent
expanding the argument multiple times and get rid of a few casts.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/thermal.c              | 12 ++++++------
 drivers/platform/x86/asus-wmi.c     |  2 +-
 drivers/platform/x86/intel_menlow.c |  8 ++++----
 include/linux/thermal.h             |  8 +++++---
 4 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 30d8518b25fb..82707f9824ca 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -315,7 +315,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 			if (crt == -1) {
 				tz->trips.critical.flags.valid = 0;
 			} else if (crt > 0) {
-				unsigned long crt_k = CELSIUS_TO_KELVIN(crt);
+				unsigned long crt_k = CELSIUS_TO_DECI_KELVIN(crt);
 				/*
 				 * Allow override critical threshold
 				 */
@@ -351,7 +351,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 		if (psv == -1) {
 			status = AE_SUPPORT;
 		} else if (psv > 0) {
-			tmp = CELSIUS_TO_KELVIN(psv);
+			tmp = CELSIUS_TO_DECI_KELVIN(psv);
 			status = AE_OK;
 		} else {
 			status = acpi_evaluate_integer(tz->device->handle,
@@ -431,7 +431,7 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 					break;
 				if (i == 1)
 					tz->trips.active[0].temperature =
-						CELSIUS_TO_KELVIN(act);
+						CELSIUS_TO_DECI_KELVIN(act);
 				else
 					/*
 					 * Don't allow override higher than
@@ -439,9 +439,9 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 					 */
 					tz->trips.active[i - 1].temperature =
 						(tz->trips.active[i - 2].temperature <
-						CELSIUS_TO_KELVIN(act) ?
+						CELSIUS_TO_DECI_KELVIN(act) ?
 						tz->trips.active[i - 2].temperature :
-						CELSIUS_TO_KELVIN(act));
+						CELSIUS_TO_DECI_KELVIN(act));
 				break;
 			} else {
 				tz->trips.active[i].temperature = tmp;
@@ -1105,7 +1105,7 @@ static int acpi_thermal_add(struct acpi_device *device)
 	INIT_WORK(&tz->thermal_check_work, acpi_thermal_check_fn);
 
 	pr_info(PREFIX "%s [%s] (%ld C)\n", acpi_device_name(device),
-		acpi_device_bid(device), KELVIN_TO_CELSIUS(tz->temperature));
+		acpi_device_bid(device), DECI_KELVIN_TO_CELSIUS(tz->temperature));
 	goto end;
 
 free_memory:
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index efbc3f0c592b..bb80f7a29496 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -1318,7 +1318,7 @@ static ssize_t asus_hwmon_temp1(struct device *dev,
 	if (err < 0)
 		return err;
 
-	value = KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000;
+	value = DECI_KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000;
 
 	return sprintf(buf, "%d\n", value);
 }
diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c
index e8b46d2c468c..0a919d81662c 100644
--- a/drivers/platform/x86/intel_menlow.c
+++ b/drivers/platform/x86/intel_menlow.c
@@ -315,7 +315,7 @@ static ssize_t aux0_show(struct device *dev,
 
 	result = sensor_get_auxtrip(attr->handle, 0, &value);
 
-	return result ? result : sprintf(buf, "%lu", KELVIN_TO_CELSIUS(value));
+	return result ? result : sprintf(buf, "%lu", DECI_KELVIN_TO_CELSIUS(value));
 }
 
 static ssize_t aux1_show(struct device *dev,
@@ -327,7 +327,7 @@ static ssize_t aux1_show(struct device *dev,
 
 	result = sensor_get_auxtrip(attr->handle, 1, &value);
 
-	return result ? result : sprintf(buf, "%lu", KELVIN_TO_CELSIUS(value));
+	return result ? result : sprintf(buf, "%lu", DECI_KELVIN_TO_CELSIUS(value));
 }
 
 static ssize_t aux0_store(struct device *dev,
@@ -345,7 +345,7 @@ static ssize_t aux0_store(struct device *dev,
 	if (value < 0)
 		return -EINVAL;
 
-	result = sensor_set_auxtrip(attr->handle, 0, CELSIUS_TO_KELVIN(value));
+	result = sensor_set_auxtrip(attr->handle, 0, CELSIUS_TO_DECI_KELVIN(value));
 	return result ? result : count;
 }
 
@@ -364,7 +364,7 @@ static ssize_t aux1_store(struct device *dev,
 	if (value < 0)
 		return -EINVAL;
 
-	result = sensor_set_auxtrip(attr->handle, 1, CELSIUS_TO_KELVIN(value));
+	result = sensor_set_auxtrip(attr->handle, 1, CELSIUS_TO_DECI_KELVIN(value));
 	return result ? result : count;
 }
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 157d366e761b..4014a59828fc 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -44,9 +44,11 @@
 #define THERMAL_WEIGHT_DEFAULT 0
 
 /* Unit conversion macros */
-#define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
-				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
-#define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
+#define DECI_KELVIN_TO_CELSIUS(t)	({			\
+	long _t = (t);						\
+	((_t-2732 >= 0) ? (_t-2732+5)/10 : (_t-2732-5)/10);	\
+})
+#define CELSIUS_TO_DECI_KELVIN(t)	((t)*10+2732)
 #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
 #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
 #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
-- 
cgit v1.2.3


From 0ad95472bf169a3501991f8f33f5147f792a8116 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Wed, 23 Sep 2015 15:49:29 +0300
Subject: lockd: create NSM handles per net namespace

Commit cb7323fffa85 ("lockd: create and use per-net NSM
 RPC clients on MON/UNMON requests") introduced per-net
NSM RPC clients. Unfortunately this doesn't make any sense
without per-net nsm_handle.

E.g. the following scenario could happen
Two hosts (X and Y) in different namespaces (A and B) share
the same nsm struct.

1. nsm_monitor(host_X) called => NSM rpc client created,
	nsm->sm_monitored bit set.
2. nsm_mointor(host-Y) called => nsm->sm_monitored already set,
	we just exit. Thus in namespace B ln->nsm_clnt == NULL.
3. host X destroyed => nsm->sm_count decremented to 1
4. host Y destroyed => nsm_unmonitor() => nsm_mon_unmon() => NULL-ptr
	dereference of *ln->nsm_clnt

So this could be fixed by making per-net nsm_handles list,
instead of global. Thus different net namespaces will not be able
share the same nsm_handle.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/host.c             |  7 ++++---
 fs/lockd/mon.c              | 36 ++++++++++++++++++++++--------------
 fs/lockd/netns.h            |  1 +
 fs/lockd/svc.c              |  1 +
 fs/lockd/svc4proc.c         |  2 +-
 fs/lockd/svcproc.c          |  2 +-
 include/linux/lockd/lockd.h |  9 ++++++---
 7 files changed, 36 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 969d589c848d..b5f3c3ab0d5f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -116,7 +116,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
 		atomic_inc(&nsm->sm_count);
 	else {
 		host = NULL;
-		nsm = nsm_get_handle(ni->sap, ni->salen,
+		nsm = nsm_get_handle(ni->net, ni->sap, ni->salen,
 					ni->hostname, ni->hostname_len);
 		if (unlikely(nsm == NULL)) {
 			dprintk("lockd: %s failed; no nsm handle\n",
@@ -534,17 +534,18 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
 
 /**
  * nlm_host_rebooted - Release all resources held by rebooted host
+ * @net:  network namespace
  * @info: pointer to decoded results of NLM_SM_NOTIFY call
  *
  * We were notified that the specified host has rebooted.  Release
  * all resources held by that peer.
  */
-void nlm_host_rebooted(const struct nlm_reboot *info)
+void nlm_host_rebooted(const struct net *net, const struct nlm_reboot *info)
 {
 	struct nsm_handle *nsm;
 	struct nlm_host	*host;
 
-	nsm = nsm_reboot_lookup(info);
+	nsm = nsm_reboot_lookup(net, info);
 	if (unlikely(nsm == NULL))
 		return;
 
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 47a32b6d9b90..6c05cd17e520 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -51,7 +51,6 @@ struct nsm_res {
 };
 
 static const struct rpc_program	nsm_program;
-static				LIST_HEAD(nsm_handles);
 static				DEFINE_SPINLOCK(nsm_lock);
 
 /*
@@ -264,33 +263,35 @@ void nsm_unmonitor(const struct nlm_host *host)
 	}
 }
 
-static struct nsm_handle *nsm_lookup_hostname(const char *hostname,
-					      const size_t len)
+static struct nsm_handle *nsm_lookup_hostname(const struct list_head *nsm_handles,
+					const char *hostname, const size_t len)
 {
 	struct nsm_handle *nsm;
 
-	list_for_each_entry(nsm, &nsm_handles, sm_link)
+	list_for_each_entry(nsm, nsm_handles, sm_link)
 		if (strlen(nsm->sm_name) == len &&
 		    memcmp(nsm->sm_name, hostname, len) == 0)
 			return nsm;
 	return NULL;
 }
 
-static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
+static struct nsm_handle *nsm_lookup_addr(const struct list_head *nsm_handles,
+					const struct sockaddr *sap)
 {
 	struct nsm_handle *nsm;
 
-	list_for_each_entry(nsm, &nsm_handles, sm_link)
+	list_for_each_entry(nsm, nsm_handles, sm_link)
 		if (rpc_cmp_addr(nsm_addr(nsm), sap))
 			return nsm;
 	return NULL;
 }
 
-static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
+static struct nsm_handle *nsm_lookup_priv(const struct list_head *nsm_handles,
+					const struct nsm_private *priv)
 {
 	struct nsm_handle *nsm;
 
-	list_for_each_entry(nsm, &nsm_handles, sm_link)
+	list_for_each_entry(nsm, nsm_handles, sm_link)
 		if (memcmp(nsm->sm_priv.data, priv->data,
 					sizeof(priv->data)) == 0)
 			return nsm;
@@ -353,6 +354,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
 
 /**
  * nsm_get_handle - Find or create a cached nsm_handle
+ * @net: network namespace
  * @sap: pointer to socket address of handle to find
  * @salen: length of socket address
  * @hostname: pointer to C string containing hostname to find
@@ -365,11 +367,13 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
  * @hostname cannot be found in the handle cache.  Returns NULL if
  * an error occurs.
  */
-struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+struct nsm_handle *nsm_get_handle(const struct net *net,
+				  const struct sockaddr *sap,
 				  const size_t salen, const char *hostname,
 				  const size_t hostname_len)
 {
 	struct nsm_handle *cached, *new = NULL;
+	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
 		if (printk_ratelimit()) {
@@ -384,9 +388,10 @@ retry:
 	spin_lock(&nsm_lock);
 
 	if (nsm_use_hostnames && hostname != NULL)
-		cached = nsm_lookup_hostname(hostname, hostname_len);
+		cached = nsm_lookup_hostname(&ln->nsm_handles,
+					hostname, hostname_len);
 	else
-		cached = nsm_lookup_addr(sap);
+		cached = nsm_lookup_addr(&ln->nsm_handles, sap);
 
 	if (cached != NULL) {
 		atomic_inc(&cached->sm_count);
@@ -400,7 +405,7 @@ retry:
 	}
 
 	if (new != NULL) {
-		list_add(&new->sm_link, &nsm_handles);
+		list_add(&new->sm_link, &ln->nsm_handles);
 		spin_unlock(&nsm_lock);
 		dprintk("lockd: created nsm_handle for %s (%s)\n",
 				new->sm_name, new->sm_addrbuf);
@@ -417,19 +422,22 @@ retry:
 
 /**
  * nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle
+ * @net:  network namespace
  * @info: pointer to NLMPROC_SM_NOTIFY arguments
  *
  * Returns a matching nsm_handle if found in the nsm cache. The returned
  * nsm_handle's reference count is bumped. Otherwise returns NULL if some
  * error occurred.
  */
-struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info)
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+				const struct nlm_reboot *info)
 {
 	struct nsm_handle *cached;
+	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	spin_lock(&nsm_lock);
 
-	cached = nsm_lookup_priv(&info->priv);
+	cached = nsm_lookup_priv(&ln->nsm_handles, &info->priv);
 	if (unlikely(cached == NULL)) {
 		spin_unlock(&nsm_lock);
 		dprintk("lockd: never saw rebooted peer '%.*s' before\n",
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 097bfa3adb1c..89fe011b1335 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -15,6 +15,7 @@ struct lockd_net {
 	spinlock_t nsm_clnt_lock;
 	unsigned int nsm_users;
 	struct rpc_clnt *nsm_clnt;
+	struct list_head nsm_handles;
 };
 
 extern int lockd_net_id;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index d678bcc3cbcb..0dff13f41808 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -593,6 +593,7 @@ static int lockd_init_net(struct net *net)
 	INIT_LIST_HEAD(&ln->lockd_manager.list);
 	ln->lockd_manager.block_opens = false;
 	spin_lock_init(&ln->nsm_clnt_lock);
+	INIT_LIST_HEAD(&ln->nsm_handles);
 	return 0;
 }
 
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b147d1ae71fd..09c576f26c7b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -421,7 +421,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 		return rpc_system_err;
 	}
 
-	nlm_host_rebooted(argp);
+	nlm_host_rebooted(SVC_NET(rqstp), argp);
 	return rpc_success;
 }
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 21171f0c6477..fb26b9f522e7 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -464,7 +464,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 		return rpc_system_err;
 	}
 
-	nlm_host_rebooted(argp);
+	nlm_host_rebooted(SVC_NET(rqstp), argp);
 	return rpc_success;
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ff82a32871b5..fd3b65bf51b5 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -235,7 +235,8 @@ void		  nlm_rebind_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
 void		  nlm_shutdown_hosts_net(struct net *net);
-void		  nlm_host_rebooted(const struct nlm_reboot *);
+void		  nlm_host_rebooted(const struct net *net,
+					const struct nlm_reboot *);
 
 /*
  * Host monitoring
@@ -243,11 +244,13 @@ void		  nlm_host_rebooted(const struct nlm_reboot *);
 int		  nsm_monitor(const struct nlm_host *host);
 void		  nsm_unmonitor(const struct nlm_host *host);
 
-struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+struct nsm_handle *nsm_get_handle(const struct net *net,
+					const struct sockaddr *sap,
 					const size_t salen,
 					const char *hostname,
 					const size_t hostname_len);
-struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info);
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+					const struct nlm_reboot *info);
 void		  nsm_release(struct nsm_handle *nsm);
 
 /*
-- 
cgit v1.2.3


From 870823e629ea194e6cf8e82a9694ac62cad49512 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Oct 2015 15:32:37 +0200
Subject: configfs: add show and store methods to struct configfs_attribute

Add methods to struct configfs_attribute to directly show and store
attributes without adding boilerplate code to every user.  In addition
to the methods this also adds 3 helper macros to define read/write,
read-only and write-only attributes with a single line of code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 fs/configfs/file.c       | 17 ++++++++++++-----
 include/linux/configfs.h | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 403269ffcdf3..106ca589e90a 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -74,7 +74,11 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
 	if (!buffer->page)
 		return -ENOMEM;
 
-	count = ops->show_attribute(item,attr,buffer->page);
+	if (ops->show_attribute)
+		count = ops->show_attribute(item, attr, buffer->page);
+	else
+		count = attr->show(item, buffer->page);
+
 	buffer->needs_read_fill = 0;
 	BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
 	if (count >= 0)
@@ -173,7 +177,9 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size
 	struct config_item * item = to_item(dentry->d_parent);
 	struct configfs_item_operations * ops = buffer->ops;
 
-	return ops->store_attribute(item,attr,buffer->page,count);
+	if (ops->store_attribute)
+		return ops->store_attribute(item, attr, buffer->page, count);
+	return attr->store(item, buffer->page, count);
 }
 
 
@@ -237,8 +243,8 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * and we must have a store method.
 	 */
 	if (file->f_mode & FMODE_WRITE) {
-
-		if (!(inode->i_mode & S_IWUGO) || !ops->store_attribute)
+		if (!(inode->i_mode & S_IWUGO) ||
+		    (!ops->store_attribute && !attr->store))
 			goto Eaccess;
 
 	}
@@ -248,7 +254,8 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * must be a show method for it.
 	 */
 	if (file->f_mode & FMODE_READ) {
-		if (!(inode->i_mode & S_IRUGO) || !ops->show_attribute)
+		if (!(inode->i_mode & S_IRUGO) ||
+		    (!ops->show_attribute && !attr->show))
 			goto Eaccess;
 	}
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 63a36e89d0eb..85e9956a86de 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -125,8 +125,35 @@ struct configfs_attribute {
 	const char		*ca_name;
 	struct module 		*ca_owner;
 	umode_t			ca_mode;
+	ssize_t (*show)(struct config_item *, char *);
+	ssize_t (*store)(struct config_item *, const char *, size_t);
 };
 
+#define CONFIGFS_ATTR(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO | S_IWUSR,		\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+	.store		= _pfx##_name##_store,		\
+}
+
+#define CONFIGFS_ATTR_RO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO,			\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+}
+
+#define CONFIGFS_ATTR_WO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IWUSR,			\
+	.ca_owner	= THIS_MODULE,			\
+	.store		= _pfx##_name##_store,		\
+}
+
 /*
  * Users often need to create attribute structures for their configurable
  * attributes, containing a configfs_attribute member and function pointers
-- 
cgit v1.2.3


From 45b6a73f62ebcf3ff067895fb8030e67f4c7b67f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Oct 2015 15:32:38 +0200
Subject: usb-gadget: use per-attribute show and store methods

To simplify the configfs interface and remove boilerplate code that also
causes binary bloat.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/usb/gadget/configfs.c       | 295 ++++++++++++++----------------------
 include/linux/usb/gadget_configfs.h |  19 +--
 2 files changed, 118 insertions(+), 196 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 294eb74fb078..163d305e1200 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -64,6 +64,11 @@ struct gadget_info {
 	char qw_sign[OS_STRING_QW_SIGN_LEN];
 };
 
+static inline struct gadget_info *to_gadget_info(struct config_item *item)
+{
+	 return container_of(to_config_group(item), struct gadget_info, group);
+}
+
 struct config_usb_cfg {
 	struct config_group group;
 	struct config_group strings_group;
@@ -74,6 +79,12 @@ struct config_usb_cfg {
 	struct usb_gadget_strings *gstrings[MAX_USB_STRING_LANGS + 1];
 };
 
+static inline struct config_usb_cfg *to_config_usb_cfg(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct config_usb_cfg,
+			group);
+}
+
 struct gadget_strings {
 	struct usb_gadget_strings stringtab_dev;
 	struct usb_string strings[USB_GADGET_FIRST_AVAIL_IDX];
@@ -117,32 +128,25 @@ static int usb_string_copy(const char *s, char **s_copy)
 	return 0;
 }
 
-CONFIGFS_ATTR_STRUCT(gadget_info);
-CONFIGFS_ATTR_STRUCT(config_usb_cfg);
-
-#define GI_DEVICE_DESC_ITEM_ATTR(name)	\
-	static struct gadget_info_attribute gadget_cdev_desc_##name = \
-		__CONFIGFS_ATTR(name,  S_IRUGO | S_IWUSR,		\
-				gadget_dev_desc_##name##_show,		\
-				gadget_dev_desc_##name##_store)
-
 #define GI_DEVICE_DESC_SIMPLE_R_u8(__name)	\
-	static ssize_t gadget_dev_desc_##__name##_show(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##__name##_show(struct config_item *item, \
 			char *page)	\
 {	\
-	return sprintf(page, "0x%02x\n", gi->cdev.desc.__name);	\
+	return sprintf(page, "0x%02x\n", \
+		to_gadget_info(item)->cdev.desc.__name); \
 }
 
 #define GI_DEVICE_DESC_SIMPLE_R_u16(__name)	\
-	static ssize_t gadget_dev_desc_##__name##_show(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##__name##_show(struct config_item *item, \
 			char *page)	\
 {	\
-	return sprintf(page, "0x%04x\n", le16_to_cpup(&gi->cdev.desc.__name)); \
+	return sprintf(page, "0x%04x\n", \
+		le16_to_cpup(&to_gadget_info(item)->cdev.desc.__name)); \
 }
 
 
 #define GI_DEVICE_DESC_SIMPLE_W_u8(_name)		\
-	static ssize_t gadget_dev_desc_##_name##_store(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##_name##_store(struct config_item *item, \
 		const char *page, size_t len)		\
 {							\
 	u8 val;						\
@@ -150,12 +154,12 @@ CONFIGFS_ATTR_STRUCT(config_usb_cfg);
 	ret = kstrtou8(page, 0, &val);			\
 	if (ret)					\
 		return ret;				\
-	gi->cdev.desc._name = val;			\
+	to_gadget_info(item)->cdev.desc._name = val;	\
 	return len;					\
 }
 
 #define GI_DEVICE_DESC_SIMPLE_W_u16(_name)	\
-	static ssize_t gadget_dev_desc_##_name##_store(struct gadget_info *gi, \
+static ssize_t gadget_dev_desc_##_name##_store(struct config_item *item, \
 		const char *page, size_t len)		\
 {							\
 	u16 val;					\
@@ -163,7 +167,7 @@ CONFIGFS_ATTR_STRUCT(config_usb_cfg);
 	ret = kstrtou16(page, 0, &val);			\
 	if (ret)					\
 		return ret;				\
-	gi->cdev.desc._name = cpu_to_le16p(&val);	\
+	to_gadget_info(item)->cdev.desc._name = cpu_to_le16p(&val);	\
 	return len;					\
 }
 
@@ -193,7 +197,7 @@ static ssize_t is_valid_bcd(u16 bcd_val)
 	return 0;
 }
 
-static ssize_t gadget_dev_desc_bcdDevice_store(struct gadget_info *gi,
+static ssize_t gadget_dev_desc_bcdDevice_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u16 bcdDevice;
@@ -206,11 +210,11 @@ static ssize_t gadget_dev_desc_bcdDevice_store(struct gadget_info *gi,
 	if (ret)
 		return ret;
 
-	gi->cdev.desc.bcdDevice = cpu_to_le16(bcdDevice);
+	to_gadget_info(item)->cdev.desc.bcdDevice = cpu_to_le16(bcdDevice);
 	return len;
 }
 
-static ssize_t gadget_dev_desc_bcdUSB_store(struct gadget_info *gi,
+static ssize_t gadget_dev_desc_bcdUSB_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u16 bcdUSB;
@@ -223,13 +227,13 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct gadget_info *gi,
 	if (ret)
 		return ret;
 
-	gi->cdev.desc.bcdUSB = cpu_to_le16(bcdUSB);
+	to_gadget_info(item)->cdev.desc.bcdUSB = cpu_to_le16(bcdUSB);
 	return len;
 }
 
-static ssize_t gadget_dev_desc_UDC_show(struct gadget_info *gi, char *page)
+static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page)
 {
-	return sprintf(page, "%s\n", gi->udc_name ?: "");
+	return sprintf(page, "%s\n", to_gadget_info(item)->udc_name ?: "");
 }
 
 static int unregister_gadget(struct gadget_info *gi)
@@ -247,9 +251,10 @@ static int unregister_gadget(struct gadget_info *gi)
 	return 0;
 }
 
-static ssize_t gadget_dev_desc_UDC_store(struct gadget_info *gi,
+static ssize_t gadget_dev_desc_UDC_store(struct config_item *item,
 		const char *page, size_t len)
 {
+	struct gadget_info *gi = to_gadget_info(item);
 	char *name;
 	int ret;
 
@@ -283,34 +288,29 @@ err:
 	return ret;
 }
 
-GI_DEVICE_DESC_ITEM_ATTR(bDeviceClass);
-GI_DEVICE_DESC_ITEM_ATTR(bDeviceSubClass);
-GI_DEVICE_DESC_ITEM_ATTR(bDeviceProtocol);
-GI_DEVICE_DESC_ITEM_ATTR(bMaxPacketSize0);
-GI_DEVICE_DESC_ITEM_ATTR(idVendor);
-GI_DEVICE_DESC_ITEM_ATTR(idProduct);
-GI_DEVICE_DESC_ITEM_ATTR(bcdDevice);
-GI_DEVICE_DESC_ITEM_ATTR(bcdUSB);
-GI_DEVICE_DESC_ITEM_ATTR(UDC);
+CONFIGFS_ATTR(gadget_dev_desc_, bDeviceClass);
+CONFIGFS_ATTR(gadget_dev_desc_, bDeviceSubClass);
+CONFIGFS_ATTR(gadget_dev_desc_, bDeviceProtocol);
+CONFIGFS_ATTR(gadget_dev_desc_, bMaxPacketSize0);
+CONFIGFS_ATTR(gadget_dev_desc_, idVendor);
+CONFIGFS_ATTR(gadget_dev_desc_, idProduct);
+CONFIGFS_ATTR(gadget_dev_desc_, bcdDevice);
+CONFIGFS_ATTR(gadget_dev_desc_, bcdUSB);
+CONFIGFS_ATTR(gadget_dev_desc_, UDC);
 
 static struct configfs_attribute *gadget_root_attrs[] = {
-	&gadget_cdev_desc_bDeviceClass.attr,
-	&gadget_cdev_desc_bDeviceSubClass.attr,
-	&gadget_cdev_desc_bDeviceProtocol.attr,
-	&gadget_cdev_desc_bMaxPacketSize0.attr,
-	&gadget_cdev_desc_idVendor.attr,
-	&gadget_cdev_desc_idProduct.attr,
-	&gadget_cdev_desc_bcdDevice.attr,
-	&gadget_cdev_desc_bcdUSB.attr,
-	&gadget_cdev_desc_UDC.attr,
+	&gadget_dev_desc_attr_bDeviceClass,
+	&gadget_dev_desc_attr_bDeviceSubClass,
+	&gadget_dev_desc_attr_bDeviceProtocol,
+	&gadget_dev_desc_attr_bMaxPacketSize0,
+	&gadget_dev_desc_attr_idVendor,
+	&gadget_dev_desc_attr_idProduct,
+	&gadget_dev_desc_attr_bcdDevice,
+	&gadget_dev_desc_attr_bcdUSB,
+	&gadget_dev_desc_attr_UDC,
 	NULL,
 };
 
-static inline struct gadget_info *to_gadget_info(struct config_item *item)
-{
-	 return container_of(to_config_group(item), struct gadget_info, group);
-}
-
 static inline struct gadget_strings *to_gadget_strings(struct config_item *item)
 {
 	 return container_of(to_config_group(item), struct gadget_strings,
@@ -324,12 +324,6 @@ static inline struct gadget_config_name *to_gadget_config_name(
 			 group);
 }
 
-static inline struct config_usb_cfg *to_config_usb_cfg(struct config_item *item)
-{
-	return container_of(to_config_group(item), struct config_usb_cfg,
-			group);
-}
-
 static inline struct usb_function_instance *to_usb_function_instance(
 		struct config_item *item)
 {
@@ -348,12 +342,8 @@ static void gadget_info_attr_release(struct config_item *item)
 	kfree(gi);
 }
 
-CONFIGFS_ATTR_OPS(gadget_info);
-
 static struct configfs_item_operations gadget_root_item_ops = {
 	.release                = gadget_info_attr_release,
-	.show_attribute         = gadget_info_attr_show,
-	.store_attribute        = gadget_info_attr_store,
 };
 
 static void gadget_config_attr_release(struct config_item *item)
@@ -454,24 +444,20 @@ static int config_usb_cfg_unlink(
 	return 0;
 }
 
-CONFIGFS_ATTR_OPS(config_usb_cfg);
-
 static struct configfs_item_operations gadget_config_item_ops = {
 	.release                = gadget_config_attr_release,
-	.show_attribute         = config_usb_cfg_attr_show,
-	.store_attribute        = config_usb_cfg_attr_store,
 	.allow_link             = config_usb_cfg_link,
 	.drop_link              = config_usb_cfg_unlink,
 };
 
 
-static ssize_t gadget_config_desc_MaxPower_show(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_MaxPower_show(struct config_item *item,
 		char *page)
 {
-	return sprintf(page, "%u\n", cfg->c.MaxPower);
+	return sprintf(page, "%u\n", to_config_usb_cfg(item)->c.MaxPower);
 }
 
-static ssize_t gadget_config_desc_MaxPower_store(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_MaxPower_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u16 val;
@@ -481,17 +467,18 @@ static ssize_t gadget_config_desc_MaxPower_store(struct config_usb_cfg *cfg,
 		return ret;
 	if (DIV_ROUND_UP(val, 8) > 0xff)
 		return -ERANGE;
-	cfg->c.MaxPower = val;
+	to_config_usb_cfg(item)->c.MaxPower = val;
 	return len;
 }
 
-static ssize_t gadget_config_desc_bmAttributes_show(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_bmAttributes_show(struct config_item *item,
 		char *page)
 {
-	return sprintf(page, "0x%02x\n", cfg->c.bmAttributes);
+	return sprintf(page, "0x%02x\n",
+		to_config_usb_cfg(item)->c.bmAttributes);
 }
 
-static ssize_t gadget_config_desc_bmAttributes_store(struct config_usb_cfg *cfg,
+static ssize_t gadget_config_desc_bmAttributes_store(struct config_item *item,
 		const char *page, size_t len)
 {
 	u8 val;
@@ -504,22 +491,16 @@ static ssize_t gadget_config_desc_bmAttributes_store(struct config_usb_cfg *cfg,
 	if (val & ~(USB_CONFIG_ATT_ONE | USB_CONFIG_ATT_SELFPOWER |
 				USB_CONFIG_ATT_WAKEUP))
 		return -EINVAL;
-	cfg->c.bmAttributes = val;
+	to_config_usb_cfg(item)->c.bmAttributes = val;
 	return len;
 }
 
-#define CFG_CONFIG_DESC_ITEM_ATTR(name)	\
-	static struct config_usb_cfg_attribute gadget_usb_cfg_##name = \
-		__CONFIGFS_ATTR(name,  S_IRUGO | S_IWUSR,		\
-				gadget_config_desc_##name##_show,	\
-				gadget_config_desc_##name##_store)
-
-CFG_CONFIG_DESC_ITEM_ATTR(MaxPower);
-CFG_CONFIG_DESC_ITEM_ATTR(bmAttributes);
+CONFIGFS_ATTR(gadget_config_desc_, MaxPower);
+CONFIGFS_ATTR(gadget_config_desc_, bmAttributes);
 
 static struct configfs_attribute *gadget_config_attrs[] = {
-	&gadget_usb_cfg_MaxPower.attr,
-	&gadget_usb_cfg_bmAttributes.attr,
+	&gadget_config_desc_attr_MaxPower,
+	&gadget_config_desc_attr_bmAttributes,
 	NULL,
 };
 
@@ -616,11 +597,10 @@ static struct config_item_type functions_type = {
 	.ct_owner       = THIS_MODULE,
 };
 
-CONFIGFS_ATTR_STRUCT(gadget_config_name);
 GS_STRINGS_RW(gadget_config_name, configuration);
 
 static struct configfs_attribute *gadget_config_name_langid_attrs[] = {
-	&gadget_config_name_configuration.attr,
+	&gadget_config_name_attr_configuration,
 	NULL,
 };
 
@@ -719,15 +699,14 @@ static struct config_item_type config_desc_type = {
 	.ct_owner       = THIS_MODULE,
 };
 
-CONFIGFS_ATTR_STRUCT(gadget_strings);
 GS_STRINGS_RW(gadget_strings, manufacturer);
 GS_STRINGS_RW(gadget_strings, product);
 GS_STRINGS_RW(gadget_strings, serialnumber);
 
 static struct configfs_attribute *gadget_strings_langid_attrs[] = {
-	&gadget_strings_manufacturer.attr,
-	&gadget_strings_product.attr,
-	&gadget_strings_serialnumber.attr,
+	&gadget_strings_attr_manufacturer,
+	&gadget_strings_attr_product,
+	&gadget_strings_attr_serialnumber,
 	NULL,
 };
 
@@ -751,27 +730,25 @@ static inline struct os_desc *to_os_desc(struct config_item *item)
 	return container_of(to_config_group(item), struct os_desc, group);
 }
 
-CONFIGFS_ATTR_STRUCT(os_desc);
-CONFIGFS_ATTR_OPS(os_desc);
-
-static ssize_t os_desc_use_show(struct os_desc *os_desc, char *page)
+static inline struct gadget_info *os_desc_item_to_gadget_info(
+		struct config_item *item)
 {
-	struct gadget_info *gi;
-
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
+	return to_gadget_info(to_os_desc(item)->group.cg_item.ci_parent);
+}
 
-	return sprintf(page, "%d", gi->use_os_desc);
+static ssize_t os_desc_use_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d",
+			os_desc_item_to_gadget_info(item)->use_os_desc);
 }
 
-static ssize_t os_desc_use_store(struct os_desc *os_desc, const char *page,
+static ssize_t os_desc_use_store(struct config_item *item, const char *page,
 				 size_t len)
 {
-	struct gadget_info *gi;
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 	int ret;
 	bool use;
 
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
-
 	mutex_lock(&gi->lock);
 	ret = strtobool(page, &use);
 	if (!ret) {
@@ -783,29 +760,19 @@ static ssize_t os_desc_use_store(struct os_desc *os_desc, const char *page,
 	return ret;
 }
 
-static struct os_desc_attribute os_desc_use =
-	__CONFIGFS_ATTR(use, S_IRUGO | S_IWUSR,
-			os_desc_use_show,
-			os_desc_use_store);
-
-static ssize_t os_desc_b_vendor_code_show(struct os_desc *os_desc, char *page)
+static ssize_t os_desc_b_vendor_code_show(struct config_item *item, char *page)
 {
-	struct gadget_info *gi;
-
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
-
-	return sprintf(page, "%d", gi->b_vendor_code);
+	return sprintf(page, "%d",
+			os_desc_item_to_gadget_info(item)->b_vendor_code);
 }
 
-static ssize_t os_desc_b_vendor_code_store(struct os_desc *os_desc,
+static ssize_t os_desc_b_vendor_code_store(struct config_item *item,
 					   const char *page, size_t len)
 {
-	struct gadget_info *gi;
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 	int ret;
 	u8 b_vendor_code;
 
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
-
 	mutex_lock(&gi->lock);
 	ret = kstrtou8(page, 0, &b_vendor_code);
 	if (!ret) {
@@ -817,29 +784,20 @@ static ssize_t os_desc_b_vendor_code_store(struct os_desc *os_desc,
 	return ret;
 }
 
-static struct os_desc_attribute os_desc_b_vendor_code =
-	__CONFIGFS_ATTR(b_vendor_code, S_IRUGO | S_IWUSR,
-			os_desc_b_vendor_code_show,
-			os_desc_b_vendor_code_store);
-
-static ssize_t os_desc_qw_sign_show(struct os_desc *os_desc, char *page)
+static ssize_t os_desc_qw_sign_show(struct config_item *item, char *page)
 {
-	struct gadget_info *gi;
-
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 
 	memcpy(page, gi->qw_sign, OS_STRING_QW_SIGN_LEN);
-
 	return OS_STRING_QW_SIGN_LEN;
 }
 
-static ssize_t os_desc_qw_sign_store(struct os_desc *os_desc, const char *page,
+static ssize_t os_desc_qw_sign_store(struct config_item *item, const char *page,
 				     size_t len)
 {
-	struct gadget_info *gi;
+	struct gadget_info *gi = os_desc_item_to_gadget_info(item);
 	int res, l;
 
-	gi = to_gadget_info(os_desc->group.cg_item.ci_parent);
 	l = min((int)len, OS_STRING_QW_SIGN_LEN >> 1);
 	if (page[l - 1] == '\n')
 		--l;
@@ -855,15 +813,14 @@ static ssize_t os_desc_qw_sign_store(struct os_desc *os_desc, const char *page,
 	return res;
 }
 
-static struct os_desc_attribute os_desc_qw_sign =
-	__CONFIGFS_ATTR(qw_sign, S_IRUGO | S_IWUSR,
-			os_desc_qw_sign_show,
-			os_desc_qw_sign_store);
+CONFIGFS_ATTR(os_desc_, use);
+CONFIGFS_ATTR(os_desc_, b_vendor_code);
+CONFIGFS_ATTR(os_desc_, qw_sign);
 
 static struct configfs_attribute *os_desc_attrs[] = {
-	&os_desc_use.attr,
-	&os_desc_b_vendor_code.attr,
-	&os_desc_qw_sign.attr,
+	&os_desc_attr_use,
+	&os_desc_attr_b_vendor_code,
+	&os_desc_attr_qw_sign,
 	NULL,
 };
 
@@ -926,8 +883,6 @@ static int os_desc_unlink(struct config_item *os_desc_ci,
 
 static struct configfs_item_operations os_desc_ops = {
 	.release                = os_desc_attr_release,
-	.show_attribute         = os_desc_attr_show,
-	.store_attribute        = os_desc_attr_store,
 	.allow_link		= os_desc_link,
 	.drop_link		= os_desc_unlink,
 };
@@ -938,28 +893,21 @@ static struct config_item_type os_desc_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
-CONFIGFS_ATTR_STRUCT(usb_os_desc);
-CONFIGFS_ATTR_OPS(usb_os_desc);
-
-
 static inline struct usb_os_desc_ext_prop
 *to_usb_os_desc_ext_prop(struct config_item *item)
 {
 	return container_of(item, struct usb_os_desc_ext_prop, item);
 }
 
-CONFIGFS_ATTR_STRUCT(usb_os_desc_ext_prop);
-CONFIGFS_ATTR_OPS(usb_os_desc_ext_prop);
-
-static ssize_t ext_prop_type_show(struct usb_os_desc_ext_prop *ext_prop,
-				  char *page)
+static ssize_t ext_prop_type_show(struct config_item *item, char *page)
 {
-	return sprintf(page, "%d", ext_prop->type);
+	return sprintf(page, "%d", to_usb_os_desc_ext_prop(item)->type);
 }
 
-static ssize_t ext_prop_type_store(struct usb_os_desc_ext_prop *ext_prop,
+static ssize_t ext_prop_type_store(struct config_item *item,
 				   const char *page, size_t len)
 {
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
 	struct usb_os_desc *desc = to_usb_os_desc(ext_prop->item.ci_parent);
 	u8 type;
 	int ret;
@@ -997,9 +945,9 @@ end:
 	return ret;
 }
 
-static ssize_t ext_prop_data_show(struct usb_os_desc_ext_prop *ext_prop,
-				  char *page)
+static ssize_t ext_prop_data_show(struct config_item *item, char *page)
 {
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
 	int len = ext_prop->data_len;
 
 	if (ext_prop->type == USB_EXT_PROP_UNICODE ||
@@ -1011,9 +959,10 @@ static ssize_t ext_prop_data_show(struct usb_os_desc_ext_prop *ext_prop,
 	return len;
 }
 
-static ssize_t ext_prop_data_store(struct usb_os_desc_ext_prop *ext_prop,
+static ssize_t ext_prop_data_store(struct config_item *item,
 				   const char *page, size_t len)
 {
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
 	struct usb_os_desc *desc = to_usb_os_desc(ext_prop->item.ci_parent);
 	char *new_data;
 	size_t ret_len = len;
@@ -1044,17 +993,12 @@ static ssize_t ext_prop_data_store(struct usb_os_desc_ext_prop *ext_prop,
 	return ret_len;
 }
 
-static struct usb_os_desc_ext_prop_attribute ext_prop_type =
-	__CONFIGFS_ATTR(type, S_IRUGO | S_IWUSR,
-			ext_prop_type_show, ext_prop_type_store);
-
-static struct usb_os_desc_ext_prop_attribute ext_prop_data =
-	__CONFIGFS_ATTR(data, S_IRUGO | S_IWUSR,
-			ext_prop_data_show, ext_prop_data_store);
+CONFIGFS_ATTR(ext_prop_, type);
+CONFIGFS_ATTR(ext_prop_, data);
 
 static struct configfs_attribute *ext_prop_attrs[] = {
-	&ext_prop_type.attr,
-	&ext_prop_data.attr,
+	&ext_prop_attr_type,
+	&ext_prop_attr_data,
 	NULL,
 };
 
@@ -1067,8 +1011,6 @@ static void usb_os_desc_ext_prop_release(struct config_item *item)
 
 static struct configfs_item_operations ext_prop_ops = {
 	.release		= usb_os_desc_ext_prop_release,
-	.show_attribute		= usb_os_desc_ext_prop_attr_show,
-	.store_attribute	= usb_os_desc_ext_prop_attr_store,
 };
 
 static struct config_item *ext_prop_make(
@@ -1137,21 +1079,17 @@ static struct configfs_group_operations interf_grp_ops = {
 	.drop_item	= &ext_prop_drop,
 };
 
-static struct configfs_item_operations interf_item_ops = {
-	.show_attribute		= usb_os_desc_attr_show,
-	.store_attribute	= usb_os_desc_attr_store,
-};
-
-static ssize_t interf_grp_compatible_id_show(struct usb_os_desc *desc,
+static ssize_t interf_grp_compatible_id_show(struct config_item *item,
 					     char *page)
 {
-	memcpy(page, desc->ext_compat_id, 8);
+	memcpy(page, to_usb_os_desc(item)->ext_compat_id, 8);
 	return 8;
 }
 
-static ssize_t interf_grp_compatible_id_store(struct usb_os_desc *desc,
+static ssize_t interf_grp_compatible_id_store(struct config_item *item,
 					      const char *page, size_t len)
 {
+	struct usb_os_desc *desc = to_usb_os_desc(item);
 	int l;
 
 	l = min_t(int, 8, len);
@@ -1167,21 +1105,17 @@ static ssize_t interf_grp_compatible_id_store(struct usb_os_desc *desc,
 	return len;
 }
 
-static struct usb_os_desc_attribute interf_grp_attr_compatible_id =
-	__CONFIGFS_ATTR(compatible_id, S_IRUGO | S_IWUSR,
-			interf_grp_compatible_id_show,
-			interf_grp_compatible_id_store);
-
-static ssize_t interf_grp_sub_compatible_id_show(struct usb_os_desc *desc,
+static ssize_t interf_grp_sub_compatible_id_show(struct config_item *item,
 						 char *page)
 {
-	memcpy(page, desc->ext_compat_id + 8, 8);
+	memcpy(page, to_usb_os_desc(item)->ext_compat_id + 8, 8);
 	return 8;
 }
 
-static ssize_t interf_grp_sub_compatible_id_store(struct usb_os_desc *desc,
+static ssize_t interf_grp_sub_compatible_id_store(struct config_item *item,
 						  const char *page, size_t len)
 {
+	struct usb_os_desc *desc = to_usb_os_desc(item);
 	int l;
 
 	l = min_t(int, 8, len);
@@ -1197,14 +1131,12 @@ static ssize_t interf_grp_sub_compatible_id_store(struct usb_os_desc *desc,
 	return len;
 }
 
-static struct usb_os_desc_attribute interf_grp_attr_sub_compatible_id =
-	__CONFIGFS_ATTR(sub_compatible_id, S_IRUGO | S_IWUSR,
-			interf_grp_sub_compatible_id_show,
-			interf_grp_sub_compatible_id_store);
+CONFIGFS_ATTR(interf_grp_, compatible_id);
+CONFIGFS_ATTR(interf_grp_, sub_compatible_id);
 
 static struct configfs_attribute *interf_grp_attrs[] = {
-	&interf_grp_attr_compatible_id.attr,
-	&interf_grp_attr_sub_compatible_id.attr,
+	&interf_grp_attr_compatible_id,
+	&interf_grp_attr_sub_compatible_id,
 	NULL
 };
 
@@ -1242,7 +1174,6 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent,
 	f_default_groups[0] = os_desc_group;
 
 	os_desc_group->default_groups = interface_groups;
-	interface_type->ct_item_ops = &interf_item_ops;
 	interface_type->ct_group_ops = &interf_grp_ops;
 	interface_type->ct_attrs = interf_grp_attrs;
 	interface_type->ct_owner = owner;
diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h
index d74c0ae989d5..c36e95730de1 100644
--- a/include/linux/usb/gadget_configfs.h
+++ b/include/linux/usb/gadget_configfs.h
@@ -7,9 +7,10 @@ int check_user_usb_string(const char *name,
 		struct usb_gadget_strings *stringtab_dev);
 
 #define GS_STRINGS_W(__struct, __name)	\
-	static ssize_t __struct##_##__name##_store(struct __struct *gs, \
+static ssize_t __struct##_##__name##_store(struct config_item *item, \
 		const char *page, size_t len)		\
 {							\
+	struct __struct *gs = to_##__struct(item);	\
 	int ret;					\
 							\
 	ret = usb_string_copy(page, &gs->__name);	\
@@ -19,30 +20,20 @@ int check_user_usb_string(const char *name,
 }
 
 #define GS_STRINGS_R(__struct, __name)	\
-	static ssize_t __struct##_##__name##_show(struct __struct *gs, \
-			char *page)	\
+static ssize_t __struct##_##__name##_show(struct config_item *item, char *page) \
 {	\
+	struct __struct *gs = to_##__struct(item);	\
 	return sprintf(page, "%s\n", gs->__name ?: "");	\
 }
 
-#define GS_STRING_ITEM_ATTR(struct_name, name)	\
-	static struct struct_name##_attribute struct_name##_##name = \
-		__CONFIGFS_ATTR(name,  S_IRUGO | S_IWUSR,		\
-				struct_name##_##name##_show,		\
-				struct_name##_##name##_store)
-
 #define GS_STRINGS_RW(struct_name, _name)	\
 	GS_STRINGS_R(struct_name, _name)	\
 	GS_STRINGS_W(struct_name, _name)	\
-	GS_STRING_ITEM_ATTR(struct_name, _name)
+	CONFIGFS_ATTR(struct_name##_, _name)
 
 #define USB_CONFIG_STRING_RW_OPS(struct_in)				\
-	CONFIGFS_ATTR_OPS(struct_in);					\
-									\
 static struct configfs_item_operations struct_in##_langid_item_ops = {	\
 	.release                = struct_in##_attr_release,		\
-	.show_attribute         = struct_in##_attr_show,		\
-	.store_attribute        = struct_in##_attr_store,		\
 };									\
 									\
 static struct config_item_type struct_in##_langid_type = {		\
-- 
cgit v1.2.3


From 517982229f78b2aebf00a8a337e84e8eeea70b8e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Oct 2015 15:32:59 +0200
Subject: configfs: remove old API

Remove the old show_attribute and store_attribute methods and update
the documentation.  Also replace the two C samples with a single new
one in the proper samples directory where people expect to find it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 Documentation/filesystems/Makefile                 |   2 -
 Documentation/filesystems/configfs/Makefile        |   3 -
 Documentation/filesystems/configfs/configfs.txt    |  38 +-
 .../configfs/configfs_example_explicit.c           | 483 ---------------------
 .../filesystems/configfs/configfs_example_macros.c | 446 -------------------
 fs/configfs/file.c                                 |  15 +-
 include/linux/configfs.h                           |  82 ----
 samples/Kconfig                                    |   6 +
 samples/Makefile                                   |   3 +-
 samples/configfs/Makefile                          |   2 +
 samples/configfs/configfs_sample.c                 | 404 +++++++++++++++++
 11 files changed, 428 insertions(+), 1056 deletions(-)
 delete mode 100644 Documentation/filesystems/configfs/Makefile
 delete mode 100644 Documentation/filesystems/configfs/configfs_example_explicit.c
 delete mode 100644 Documentation/filesystems/configfs/configfs_example_macros.c
 create mode 100644 samples/configfs/Makefile
 create mode 100644 samples/configfs/configfs_sample.c

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Makefile b/Documentation/filesystems/Makefile
index 13483d192ebb..883010ce5e35 100644
--- a/Documentation/filesystems/Makefile
+++ b/Documentation/filesystems/Makefile
@@ -1,5 +1,3 @@
-subdir-y := configfs
-
 # List of programs to build
 hostprogs-y := dnotify_test
 
diff --git a/Documentation/filesystems/configfs/Makefile b/Documentation/filesystems/configfs/Makefile
deleted file mode 100644
index be7ec5e67dbc..000000000000
--- a/Documentation/filesystems/configfs/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-ifneq ($(CONFIG_CONFIGFS_FS),)
-obj-m += configfs_example_explicit.o configfs_example_macros.o
-endif
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index b40fec9d3f53..af68efdbbfad 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -160,12 +160,6 @@ among other things.  For that, it needs a type.
 
 	struct configfs_item_operations {
 		void (*release)(struct config_item *);
-		ssize_t (*show_attribute)(struct config_item *,
-					  struct configfs_attribute *,
-					  char *);
-		ssize_t (*store_attribute)(struct config_item *,
-					   struct configfs_attribute *,
-					   const char *, size_t);
 		int (*allow_link)(struct config_item *src,
 				  struct config_item *target);
 		int (*drop_link)(struct config_item *src,
@@ -183,9 +177,7 @@ The most basic function of a config_item_type is to define what
 operations can be performed on a config_item.  All items that have been
 allocated dynamically will need to provide the ct_item_ops->release()
 method.  This method is called when the config_item's reference count
-reaches zero.  Items that wish to display an attribute need to provide
-the ct_item_ops->show_attribute() method.  Similarly, storing a new
-attribute value uses the store_attribute() method.
+reaches zero.
 
 [struct configfs_attribute]
 
@@ -193,6 +185,8 @@ attribute value uses the store_attribute() method.
 		char                    *ca_name;
 		struct module           *ca_owner;
 		umode_t                  ca_mode;
+		ssize_t (*show)(struct config_item *, char *);
+		ssize_t (*store)(struct config_item *, const char *, size_t);
 	};
 
 When a config_item wants an attribute to appear as a file in the item's
@@ -202,10 +196,10 @@ config_item_type->ct_attrs.  When the item appears in configfs, the
 attribute file will appear with the configfs_attribute->ca_name
 filename.  configfs_attribute->ca_mode specifies the file permissions.
 
-If an attribute is readable and the config_item provides a
-ct_item_ops->show_attribute() method, that method will be called
-whenever userspace asks for a read(2) on the attribute.  The converse
-will happen for write(2).
+If an attribute is readable and provides a ->show method, that method will
+be called whenever userspace asks for a read(2) on the attribute.  If an
+attribute is writable and provides a ->store  method, that method will be
+be called whenever userspace asks for a write(2) on the attribute.
 
 [struct config_group]
 
@@ -311,20 +305,10 @@ the subsystem must be ready for it.
 [An Example]
 
 The best example of these basic concepts is the simple_children
-subsystem/group and the simple_child item in configfs_example_explicit.c
-and configfs_example_macros.c.  It shows a trivial object displaying and
-storing an attribute, and a simple group creating and destroying these
-children.
-
-The only difference between configfs_example_explicit.c and
-configfs_example_macros.c is how the attributes of the childless item
-are defined.  The childless item has extended attributes, each with
-their own show()/store() operation.  This follows a convention commonly
-used in sysfs.  configfs_example_explicit.c creates these attributes
-by explicitly defining the structures involved.  Conversely
-configfs_example_macros.c uses some convenience macros from configfs.h
-to define the attributes.  These macros are similar to their sysfs
-counterparts.
+subsystem/group and the simple_child item in
+samples/configfs/configfs_sample.c. It shows a trivial object displaying
+and storing an attribute, and a simple group creating and destroying
+these children.
 
 [Hierarchy Navigation and the Subsystem Mutex]
 
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c
deleted file mode 100644
index 1420233dfa55..000000000000
--- a/Documentation/filesystems/configfs/configfs_example_explicit.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
- * configfs_example_explicit.c - This file is a demonstration module
- *      containing a number of configfs subsystems.  It explicitly defines
- *      each structure without using the helper macros defined in
- *      configfs.h.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Based on sysfs:
- * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
- *
- * configfs Copyright (C) 2005 Oracle.  All rights reserved.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/configfs.h>
-
-
-
-/*
- * 01-childless
- *
- * This first example is a childless subsystem.  It cannot create
- * any config_items.  It just has attributes.
- *
- * Note that we are enclosing the configfs_subsystem inside a container.
- * This is not necessary if a subsystem has no attributes directly
- * on the subsystem.  See the next example, 02-simple-children, for
- * such a subsystem.
- */
-
-struct childless {
-	struct configfs_subsystem subsys;
-	int showme;
-	int storeme;
-};
-
-struct childless_attribute {
-	struct configfs_attribute attr;
-	ssize_t (*show)(struct childless *, char *);
-	ssize_t (*store)(struct childless *, const char *, size_t);
-};
-
-static inline struct childless *to_childless(struct config_item *item)
-{
-	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
-}
-
-static ssize_t childless_showme_read(struct childless *childless,
-				     char *page)
-{
-	ssize_t pos;
-
-	pos = sprintf(page, "%d\n", childless->showme);
-	childless->showme++;
-
-	return pos;
-}
-
-static ssize_t childless_storeme_read(struct childless *childless,
-				      char *page)
-{
-	return sprintf(page, "%d\n", childless->storeme);
-}
-
-static ssize_t childless_storeme_write(struct childless *childless,
-				       const char *page,
-				       size_t count)
-{
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if ((*p != '\0') && (*p != '\n'))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	childless->storeme = tmp;
-
-	return count;
-}
-
-static ssize_t childless_description_read(struct childless *childless,
-					  char *page)
-{
-	return sprintf(page,
-"[01-childless]\n"
-"\n"
-"The childless subsystem is the simplest possible subsystem in\n"
-"configfs.  It does not support the creation of child config_items.\n"
-"It only has a few attributes.  In fact, it isn't much different\n"
-"than a directory in /proc.\n");
-}
-
-static struct childless_attribute childless_attr_showme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
-	.show	= childless_showme_read,
-};
-static struct childless_attribute childless_attr_storeme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
-	.show	= childless_storeme_read,
-	.store	= childless_storeme_write,
-};
-static struct childless_attribute childless_attr_description = {
-	.attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
-	.show = childless_description_read,
-};
-
-static struct configfs_attribute *childless_attrs[] = {
-	&childless_attr_showme.attr,
-	&childless_attr_storeme.attr,
-	&childless_attr_description.attr,
-	NULL,
-};
-
-static ssize_t childless_attr_show(struct config_item *item,
-				   struct configfs_attribute *attr,
-				   char *page)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = 0;
-
-	if (childless_attr->show)
-		ret = childless_attr->show(childless, page);
-	return ret;
-}
-
-static ssize_t childless_attr_store(struct config_item *item,
-				    struct configfs_attribute *attr,
-				    const char *page, size_t count)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = -EINVAL;
-
-	if (childless_attr->store)
-		ret = childless_attr->store(childless, page, count);
-	return ret;
-}
-
-static struct configfs_item_operations childless_item_ops = {
-	.show_attribute		= childless_attr_show,
-	.store_attribute	= childless_attr_store,
-};
-
-static struct config_item_type childless_type = {
-	.ct_item_ops	= &childless_item_ops,
-	.ct_attrs	= childless_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct childless childless_subsys = {
-	.subsys = {
-		.su_group = {
-			.cg_item = {
-				.ci_namebuf = "01-childless",
-				.ci_type = &childless_type,
-			},
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 02-simple-children
- *
- * This example merely has a simple one-attribute child.  Note that
- * there is no extra attribute structure, as the child's attribute is
- * known from the get-go.  Also, there is no container for the
- * subsystem, as it has no attributes of its own.
- */
-
-struct simple_child {
-	struct config_item item;
-	int storeme;
-};
-
-static inline struct simple_child *to_simple_child(struct config_item *item)
-{
-	return item ? container_of(item, struct simple_child, item) : NULL;
-}
-
-static struct configfs_attribute simple_child_attr_storeme = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "storeme",
-	.ca_mode = S_IRUGO | S_IWUSR,
-};
-
-static struct configfs_attribute *simple_child_attrs[] = {
-	&simple_child_attr_storeme,
-	NULL,
-};
-
-static ssize_t simple_child_attr_show(struct config_item *item,
-				      struct configfs_attribute *attr,
-				      char *page)
-{
-	ssize_t count;
-	struct simple_child *simple_child = to_simple_child(item);
-
-	count = sprintf(page, "%d\n", simple_child->storeme);
-
-	return count;
-}
-
-static ssize_t simple_child_attr_store(struct config_item *item,
-				       struct configfs_attribute *attr,
-				       const char *page, size_t count)
-{
-	struct simple_child *simple_child = to_simple_child(item);
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	simple_child->storeme = tmp;
-
-	return count;
-}
-
-static void simple_child_release(struct config_item *item)
-{
-	kfree(to_simple_child(item));
-}
-
-static struct configfs_item_operations simple_child_item_ops = {
-	.release		= simple_child_release,
-	.show_attribute		= simple_child_attr_show,
-	.store_attribute	= simple_child_attr_store,
-};
-
-static struct config_item_type simple_child_type = {
-	.ct_item_ops	= &simple_child_item_ops,
-	.ct_attrs	= simple_child_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-
-struct simple_children {
-	struct config_group group;
-};
-
-static inline struct simple_children *to_simple_children(struct config_item *item)
-{
-	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
-}
-
-static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
-{
-	struct simple_child *simple_child;
-
-	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
-	if (!simple_child)
-		return ERR_PTR(-ENOMEM);
-
-	config_item_init_type_name(&simple_child->item, name,
-				   &simple_child_type);
-
-	simple_child->storeme = 0;
-
-	return &simple_child->item;
-}
-
-static struct configfs_attribute simple_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *simple_children_attrs[] = {
-	&simple_children_attr_description,
-	NULL,
-};
-
-static ssize_t simple_children_attr_show(struct config_item *item,
-					 struct configfs_attribute *attr,
-					 char *page)
-{
-	return sprintf(page,
-"[02-simple-children]\n"
-"\n"
-"This subsystem allows the creation of child config_items.  These\n"
-"items have only one attribute that is readable and writeable.\n");
-}
-
-static void simple_children_release(struct config_item *item)
-{
-	kfree(to_simple_children(item));
-}
-
-static struct configfs_item_operations simple_children_item_ops = {
-	.release	= simple_children_release,
-	.show_attribute	= simple_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations simple_children_group_ops = {
-	.make_item	= simple_children_make_item,
-};
-
-static struct config_item_type simple_children_type = {
-	.ct_item_ops	= &simple_children_item_ops,
-	.ct_group_ops	= &simple_children_group_ops,
-	.ct_attrs	= simple_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem simple_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "02-simple-children",
-			.ci_type = &simple_children_type,
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 03-group-children
- *
- * This example reuses the simple_children group from above.  However,
- * the simple_children group is not the subsystem itself, it is a
- * child of the subsystem.  Creation of a group in the subsystem creates
- * a new simple_children group.  That group can then have simple_child
- * children of its own.
- */
-
-static struct config_group *group_children_make_group(struct config_group *group, const char *name)
-{
-	struct simple_children *simple_children;
-
-	simple_children = kzalloc(sizeof(struct simple_children),
-				  GFP_KERNEL);
-	if (!simple_children)
-		return ERR_PTR(-ENOMEM);
-
-	config_group_init_type_name(&simple_children->group, name,
-				    &simple_children_type);
-
-	return &simple_children->group;
-}
-
-static struct configfs_attribute group_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *group_children_attrs[] = {
-	&group_children_attr_description,
-	NULL,
-};
-
-static ssize_t group_children_attr_show(struct config_item *item,
-					struct configfs_attribute *attr,
-					char *page)
-{
-	return sprintf(page,
-"[03-group-children]\n"
-"\n"
-"This subsystem allows the creation of child config_groups.  These\n"
-"groups are like the subsystem simple-children.\n");
-}
-
-static struct configfs_item_operations group_children_item_ops = {
-	.show_attribute	= group_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations group_children_group_ops = {
-	.make_group	= group_children_make_group,
-};
-
-static struct config_item_type group_children_type = {
-	.ct_item_ops	= &group_children_item_ops,
-	.ct_group_ops	= &group_children_group_ops,
-	.ct_attrs	= group_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem group_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "03-group-children",
-			.ci_type = &group_children_type,
-		},
-	},
-};
-
-/* ----------------------------------------------------------------- */
-
-/*
- * We're now done with our subsystem definitions.
- * For convenience in this module, here's a list of them all.  It
- * allows the init function to easily register them.  Most modules
- * will only have one subsystem, and will only call register_subsystem
- * on it directly.
- */
-static struct configfs_subsystem *example_subsys[] = {
-	&childless_subsys.subsys,
-	&simple_children_subsys,
-	&group_children_subsys,
-	NULL,
-};
-
-static int __init configfs_example_init(void)
-{
-	int ret;
-	int i;
-	struct configfs_subsystem *subsys;
-
-	for (i = 0; example_subsys[i]; i++) {
-		subsys = example_subsys[i];
-
-		config_group_init(&subsys->su_group);
-		mutex_init(&subsys->su_mutex);
-		ret = configfs_register_subsystem(subsys);
-		if (ret) {
-			printk(KERN_ERR "Error %d while registering subsystem %s\n",
-			       ret,
-			       subsys->su_group.cg_item.ci_namebuf);
-			goto out_unregister;
-		}
-	}
-
-	return 0;
-
-out_unregister:
-	for (i--; i >= 0; i--)
-		configfs_unregister_subsystem(example_subsys[i]);
-
-	return ret;
-}
-
-static void __exit configfs_example_exit(void)
-{
-	int i;
-
-	for (i = 0; example_subsys[i]; i++)
-		configfs_unregister_subsystem(example_subsys[i]);
-}
-
-module_init(configfs_example_init);
-module_exit(configfs_example_exit);
-MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c
deleted file mode 100644
index 327dfbc640a9..000000000000
--- a/Documentation/filesystems/configfs/configfs_example_macros.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
- * configfs_example_macros.c - This file is a demonstration module
- *      containing a number of configfs subsystems.  It uses the helper
- *      macros defined by configfs.h
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Based on sysfs:
- * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
- *
- * configfs Copyright (C) 2005 Oracle.  All rights reserved.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/configfs.h>
-
-
-
-/*
- * 01-childless
- *
- * This first example is a childless subsystem.  It cannot create
- * any config_items.  It just has attributes.
- *
- * Note that we are enclosing the configfs_subsystem inside a container.
- * This is not necessary if a subsystem has no attributes directly
- * on the subsystem.  See the next example, 02-simple-children, for
- * such a subsystem.
- */
-
-struct childless {
-	struct configfs_subsystem subsys;
-	int showme;
-	int storeme;
-};
-
-static inline struct childless *to_childless(struct config_item *item)
-{
-	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
-}
-
-CONFIGFS_ATTR_STRUCT(childless);
-#define CHILDLESS_ATTR(_name, _mode, _show, _store)	\
-struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store)
-#define CHILDLESS_ATTR_RO(_name, _show)	\
-struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show);
-
-static ssize_t childless_showme_read(struct childless *childless,
-				     char *page)
-{
-	ssize_t pos;
-
-	pos = sprintf(page, "%d\n", childless->showme);
-	childless->showme++;
-
-	return pos;
-}
-
-static ssize_t childless_storeme_read(struct childless *childless,
-				      char *page)
-{
-	return sprintf(page, "%d\n", childless->storeme);
-}
-
-static ssize_t childless_storeme_write(struct childless *childless,
-				       const char *page,
-				       size_t count)
-{
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	childless->storeme = tmp;
-
-	return count;
-}
-
-static ssize_t childless_description_read(struct childless *childless,
-					  char *page)
-{
-	return sprintf(page,
-"[01-childless]\n"
-"\n"
-"The childless subsystem is the simplest possible subsystem in\n"
-"configfs.  It does not support the creation of child config_items.\n"
-"It only has a few attributes.  In fact, it isn't much different\n"
-"than a directory in /proc.\n");
-}
-
-CHILDLESS_ATTR_RO(showme, childless_showme_read);
-CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read,
-	       childless_storeme_write);
-CHILDLESS_ATTR_RO(description, childless_description_read);
-
-static struct configfs_attribute *childless_attrs[] = {
-	&childless_attr_showme.attr,
-	&childless_attr_storeme.attr,
-	&childless_attr_description.attr,
-	NULL,
-};
-
-CONFIGFS_ATTR_OPS(childless);
-static struct configfs_item_operations childless_item_ops = {
-	.show_attribute		= childless_attr_show,
-	.store_attribute	= childless_attr_store,
-};
-
-static struct config_item_type childless_type = {
-	.ct_item_ops	= &childless_item_ops,
-	.ct_attrs	= childless_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct childless childless_subsys = {
-	.subsys = {
-		.su_group = {
-			.cg_item = {
-				.ci_namebuf = "01-childless",
-				.ci_type = &childless_type,
-			},
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 02-simple-children
- *
- * This example merely has a simple one-attribute child.  Note that
- * there is no extra attribute structure, as the child's attribute is
- * known from the get-go.  Also, there is no container for the
- * subsystem, as it has no attributes of its own.
- */
-
-struct simple_child {
-	struct config_item item;
-	int storeme;
-};
-
-static inline struct simple_child *to_simple_child(struct config_item *item)
-{
-	return item ? container_of(item, struct simple_child, item) : NULL;
-}
-
-static struct configfs_attribute simple_child_attr_storeme = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "storeme",
-	.ca_mode = S_IRUGO | S_IWUSR,
-};
-
-static struct configfs_attribute *simple_child_attrs[] = {
-	&simple_child_attr_storeme,
-	NULL,
-};
-
-static ssize_t simple_child_attr_show(struct config_item *item,
-				      struct configfs_attribute *attr,
-				      char *page)
-{
-	ssize_t count;
-	struct simple_child *simple_child = to_simple_child(item);
-
-	count = sprintf(page, "%d\n", simple_child->storeme);
-
-	return count;
-}
-
-static ssize_t simple_child_attr_store(struct config_item *item,
-				       struct configfs_attribute *attr,
-				       const char *page, size_t count)
-{
-	struct simple_child *simple_child = to_simple_child(item);
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	simple_child->storeme = tmp;
-
-	return count;
-}
-
-static void simple_child_release(struct config_item *item)
-{
-	kfree(to_simple_child(item));
-}
-
-static struct configfs_item_operations simple_child_item_ops = {
-	.release		= simple_child_release,
-	.show_attribute		= simple_child_attr_show,
-	.store_attribute	= simple_child_attr_store,
-};
-
-static struct config_item_type simple_child_type = {
-	.ct_item_ops	= &simple_child_item_ops,
-	.ct_attrs	= simple_child_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-
-struct simple_children {
-	struct config_group group;
-};
-
-static inline struct simple_children *to_simple_children(struct config_item *item)
-{
-	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
-}
-
-static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
-{
-	struct simple_child *simple_child;
-
-	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
-	if (!simple_child)
-		return ERR_PTR(-ENOMEM);
-
-	config_item_init_type_name(&simple_child->item, name,
-				   &simple_child_type);
-
-	simple_child->storeme = 0;
-
-	return &simple_child->item;
-}
-
-static struct configfs_attribute simple_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *simple_children_attrs[] = {
-	&simple_children_attr_description,
-	NULL,
-};
-
-static ssize_t simple_children_attr_show(struct config_item *item,
-					 struct configfs_attribute *attr,
-					 char *page)
-{
-	return sprintf(page,
-"[02-simple-children]\n"
-"\n"
-"This subsystem allows the creation of child config_items.  These\n"
-"items have only one attribute that is readable and writeable.\n");
-}
-
-static void simple_children_release(struct config_item *item)
-{
-	kfree(to_simple_children(item));
-}
-
-static struct configfs_item_operations simple_children_item_ops = {
-	.release	= simple_children_release,
-	.show_attribute	= simple_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations simple_children_group_ops = {
-	.make_item	= simple_children_make_item,
-};
-
-static struct config_item_type simple_children_type = {
-	.ct_item_ops	= &simple_children_item_ops,
-	.ct_group_ops	= &simple_children_group_ops,
-	.ct_attrs	= simple_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem simple_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "02-simple-children",
-			.ci_type = &simple_children_type,
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 03-group-children
- *
- * This example reuses the simple_children group from above.  However,
- * the simple_children group is not the subsystem itself, it is a
- * child of the subsystem.  Creation of a group in the subsystem creates
- * a new simple_children group.  That group can then have simple_child
- * children of its own.
- */
-
-static struct config_group *group_children_make_group(struct config_group *group, const char *name)
-{
-	struct simple_children *simple_children;
-
-	simple_children = kzalloc(sizeof(struct simple_children),
-				  GFP_KERNEL);
-	if (!simple_children)
-		return ERR_PTR(-ENOMEM);
-
-	config_group_init_type_name(&simple_children->group, name,
-				    &simple_children_type);
-
-	return &simple_children->group;
-}
-
-static struct configfs_attribute group_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *group_children_attrs[] = {
-	&group_children_attr_description,
-	NULL,
-};
-
-static ssize_t group_children_attr_show(struct config_item *item,
-					struct configfs_attribute *attr,
-					char *page)
-{
-	return sprintf(page,
-"[03-group-children]\n"
-"\n"
-"This subsystem allows the creation of child config_groups.  These\n"
-"groups are like the subsystem simple-children.\n");
-}
-
-static struct configfs_item_operations group_children_item_ops = {
-	.show_attribute	= group_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations group_children_group_ops = {
-	.make_group	= group_children_make_group,
-};
-
-static struct config_item_type group_children_type = {
-	.ct_item_ops	= &group_children_item_ops,
-	.ct_group_ops	= &group_children_group_ops,
-	.ct_attrs	= group_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem group_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "03-group-children",
-			.ci_type = &group_children_type,
-		},
-	},
-};
-
-/* ----------------------------------------------------------------- */
-
-/*
- * We're now done with our subsystem definitions.
- * For convenience in this module, here's a list of them all.  It
- * allows the init function to easily register them.  Most modules
- * will only have one subsystem, and will only call register_subsystem
- * on it directly.
- */
-static struct configfs_subsystem *example_subsys[] = {
-	&childless_subsys.subsys,
-	&simple_children_subsys,
-	&group_children_subsys,
-	NULL,
-};
-
-static int __init configfs_example_init(void)
-{
-	int ret;
-	int i;
-	struct configfs_subsystem *subsys;
-
-	for (i = 0; example_subsys[i]; i++) {
-		subsys = example_subsys[i];
-
-		config_group_init(&subsys->su_group);
-		mutex_init(&subsys->su_mutex);
-		ret = configfs_register_subsystem(subsys);
-		if (ret) {
-			printk(KERN_ERR "Error %d while registering subsystem %s\n",
-			       ret,
-			       subsys->su_group.cg_item.ci_namebuf);
-			goto out_unregister;
-		}
-	}
-
-	return 0;
-
-out_unregister:
-	for (i--; i >= 0; i--)
-		configfs_unregister_subsystem(example_subsys[i]);
-
-	return ret;
-}
-
-static void __exit configfs_example_exit(void)
-{
-	int i;
-
-	for (i = 0; example_subsys[i]; i++)
-		configfs_unregister_subsystem(example_subsys[i]);
-}
-
-module_init(configfs_example_init);
-module_exit(configfs_example_exit);
-MODULE_LICENSE("GPL");
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 106ca589e90a..d39099ea7df7 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -65,7 +65,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
 {
 	struct configfs_attribute * attr = to_attr(dentry);
 	struct config_item * item = to_item(dentry->d_parent);
-	struct configfs_item_operations * ops = buffer->ops;
 	int ret = 0;
 	ssize_t count;
 
@@ -74,10 +73,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
 	if (!buffer->page)
 		return -ENOMEM;
 
-	if (ops->show_attribute)
-		count = ops->show_attribute(item, attr, buffer->page);
-	else
-		count = attr->show(item, buffer->page);
+	count = attr->show(item, buffer->page);
 
 	buffer->needs_read_fill = 0;
 	BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
@@ -175,10 +171,7 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size
 {
 	struct configfs_attribute * attr = to_attr(dentry);
 	struct config_item * item = to_item(dentry->d_parent);
-	struct configfs_item_operations * ops = buffer->ops;
 
-	if (ops->store_attribute)
-		return ops->store_attribute(item, attr, buffer->page, count);
 	return attr->store(item, buffer->page, count);
 }
 
@@ -243,8 +236,7 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * and we must have a store method.
 	 */
 	if (file->f_mode & FMODE_WRITE) {
-		if (!(inode->i_mode & S_IWUGO) ||
-		    (!ops->store_attribute && !attr->store))
+		if (!(inode->i_mode & S_IWUGO) || !attr->store)
 			goto Eaccess;
 
 	}
@@ -254,8 +246,7 @@ static int check_perm(struct inode * inode, struct file * file)
 	 * must be a show method for it.
 	 */
 	if (file->f_mode & FMODE_READ) {
-		if (!(inode->i_mode & S_IRUGO) ||
-		    (!ops->show_attribute && !attr->show))
+		if (!(inode->i_mode & S_IRUGO) || !attr->show)
 			goto Eaccess;
 	}
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 85e9956a86de..a8a335b7fce0 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -154,86 +154,6 @@ static struct configfs_attribute _pfx##attr_##_name = {	\
 	.store		= _pfx##_name##_store,		\
 }
 
-/*
- * Users often need to create attribute structures for their configurable
- * attributes, containing a configfs_attribute member and function pointers
- * for the show() and store() operations on that attribute. If they don't
- * need anything else on the extended attribute structure, they can use
- * this macro to define it  The argument _item is the name of the
- * config_item structure.
- */
-#define CONFIGFS_ATTR_STRUCT(_item)					\
-struct _item##_attribute {						\
-	struct configfs_attribute attr;					\
-	ssize_t (*show)(struct _item *, char *);			\
-	ssize_t (*store)(struct _item *, const char *, size_t);		\
-}
-
-/*
- * With the extended attribute structure, users can use this macro
- * (similar to sysfs' __ATTR) to make defining attributes easier.
- * An example:
- * #define MYITEM_ATTR(_name, _mode, _show, _store)	\
- * struct myitem_attribute childless_attr_##_name =	\
- *         __CONFIGFS_ATTR(_name, _mode, _show, _store)
- */
-#define __CONFIGFS_ATTR(_name, _mode, _show, _store)			\
-{									\
-	.attr	= {							\
-			.ca_name = __stringify(_name),			\
-			.ca_mode = _mode,				\
-			.ca_owner = THIS_MODULE,			\
-	},								\
-	.show	= _show,						\
-	.store	= _store,						\
-}
-/* Here is a readonly version, only requiring a show() operation */
-#define __CONFIGFS_ATTR_RO(_name, _show)				\
-{									\
-	.attr	= {							\
-			.ca_name = __stringify(_name),			\
-			.ca_mode = 0444,				\
-			.ca_owner = THIS_MODULE,			\
-	},								\
-	.show	= _show,						\
-}
-
-/*
- * With these extended attributes, the simple show_attribute() and
- * store_attribute() operations need to call the show() and store() of the
- * attributes.  This is a common pattern, so we provide a macro to define
- * them.  The argument _item is the name of the config_item structure.
- * This macro expects the attributes to be named "struct <name>_attribute"
- * and the function to_<name>() to exist;
- */
-#define CONFIGFS_ATTR_OPS(_item)					\
-static ssize_t _item##_attr_show(struct config_item *item,		\
-				 struct configfs_attribute *attr,	\
-				 char *page)				\
-{									\
-	struct _item *_item = to_##_item(item);				\
-	struct _item##_attribute *_item##_attr =			\
-		container_of(attr, struct _item##_attribute, attr);	\
-	ssize_t ret = 0;						\
-									\
-	if (_item##_attr->show)						\
-		ret = _item##_attr->show(_item, page);			\
-	return ret;							\
-}									\
-static ssize_t _item##_attr_store(struct config_item *item,		\
-				  struct configfs_attribute *attr,	\
-				  const char *page, size_t count)	\
-{									\
-	struct _item *_item = to_##_item(item);				\
-	struct _item##_attribute *_item##_attr =			\
-		container_of(attr, struct _item##_attribute, attr);	\
-	ssize_t ret = -EINVAL;						\
-									\
-	if (_item##_attr->store)					\
-		ret = _item##_attr->store(_item, page, count);		\
-	return ret;							\
-}
-
 /*
  * If allow_link() exists, the item can symlink(2) out to other
  * items.  If the item is a group, it may support mkdir(2).
@@ -250,8 +170,6 @@ static ssize_t _item##_attr_store(struct config_item *item,		\
  */
 struct configfs_item_operations {
 	void (*release)(struct config_item *);
-	ssize_t	(*show_attribute)(struct config_item *, struct configfs_attribute *,char *);
-	ssize_t	(*store_attribute)(struct config_item *,struct configfs_attribute *,const char *, size_t);
 	int (*allow_link)(struct config_item *src, struct config_item *target);
 	int (*drop_link)(struct config_item *src, struct config_item *target);
 };
diff --git a/samples/Kconfig b/samples/Kconfig
index 224ebb46bed5..d54f28c6dc5e 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -70,4 +70,10 @@ config SAMPLE_LIVEPATCH
 	  Builds a sample live patch that replaces the procfs handler
 	  for /proc/cmdline to print "this has been live patched".
 
+config SAMPLE_CONFIGFS
+	tristate "Build configfs patching sample -- loadable modules only"
+	depends on CONFIGFS_FS && m
+	help
+	  Builds a sample configfs interface.
+
 endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index f00257bcc5a7..48001d7e23f0 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,5 @@
 # Makefile for Linux samples code
 
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
-			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
+			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
+			   configfs/
diff --git a/samples/configfs/Makefile b/samples/configfs/Makefile
new file mode 100644
index 000000000000..a9afd99630fc
--- /dev/null
+++ b/samples/configfs/Makefile
@@ -0,0 +1,2 @@
+
+obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs_sample.o
diff --git a/samples/configfs/configfs_sample.c b/samples/configfs/configfs_sample.c
new file mode 100644
index 000000000000..1ea33119e532
--- /dev/null
+++ b/samples/configfs/configfs_sample.c
@@ -0,0 +1,404 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_macros.c - This file is a demonstration module
+ *      containing a number of configfs subsystems.  It uses the helper
+ *      macros defined by configfs.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem.  It cannot create
+ * any config_items.  It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem.  See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+	struct configfs_subsystem subsys;
+	int showme;
+	int storeme;
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+	return item ? container_of(to_configfs_subsystem(to_config_group(item)),
+			struct childless, subsys) : NULL;
+}
+
+static ssize_t childless_showme_show(struct config_item *item, char *page)
+{
+	struct childless *childless = to_childless(item);
+	ssize_t pos;
+
+	pos = sprintf(page, "%d\n", childless->showme);
+	childless->showme++;
+
+	return pos;
+}
+
+static ssize_t childless_storeme_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d\n", to_childless(item)->storeme);
+}
+
+static ssize_t childless_storeme_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct childless *childless = to_childless(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	childless->storeme = tmp;
+
+	return count;
+}
+
+static ssize_t childless_description_show(struct config_item *item, char *page)
+{
+	return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs.  It does not support the creation of child config_items.\n"
+"It only has a few attributes.  In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+CONFIGFS_ATTR_RO(childless_, showme);
+CONFIGFS_ATTR(childless_, storeme);
+CONFIGFS_ATTR_RO(childless_, description);
+
+static struct configfs_attribute *childless_attrs[] = {
+	&childless_attr_showme,
+	&childless_attr_storeme,
+	&childless_attr_description,
+	NULL,
+};
+
+static struct config_item_type childless_type = {
+	.ct_attrs	= childless_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+	.subsys = {
+		.su_group = {
+			.cg_item = {
+				.ci_namebuf = "01-childless",
+				.ci_type = &childless_type,
+			},
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child.  Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go.  Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+	struct config_item item;
+	int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+	return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static ssize_t simple_child_storeme_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d\n", to_simple_child(item)->storeme);
+}
+
+static ssize_t simple_child_storeme_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct simple_child *simple_child = to_simple_child(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	simple_child->storeme = tmp;
+
+	return count;
+}
+
+CONFIGFS_ATTR(simple_child_, storeme);
+
+static struct configfs_attribute *simple_child_attrs[] = {
+	&simple_child_attr_storeme,
+	NULL,
+};
+
+static void simple_child_release(struct config_item *item)
+{
+	kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+	.release		= simple_child_release,
+};
+
+static struct config_item_type simple_child_type = {
+	.ct_item_ops	= &simple_child_item_ops,
+	.ct_attrs	= simple_child_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+struct simple_children {
+	struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item),
+			struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group,
+		const char *name)
+{
+	struct simple_child *simple_child;
+
+	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+	if (!simple_child)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&simple_child->item, name,
+				   &simple_child_type);
+
+	simple_child->storeme = 0;
+
+	return &simple_child->item;
+}
+
+static ssize_t simple_children_description_show(struct config_item *item,
+		char *page)
+{
+	return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items.  These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+CONFIGFS_ATTR_RO(simple_children_, description);
+
+static struct configfs_attribute *simple_children_attrs[] = {
+	&simple_children_attr_description,
+	NULL,
+};
+
+static void simple_children_release(struct config_item *item)
+{
+	kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+	.release	= simple_children_release,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+	.make_item	= simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+	.ct_item_ops	= &simple_children_item_ops,
+	.ct_group_ops	= &simple_children_group_ops,
+	.ct_attrs	= simple_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "02-simple-children",
+			.ci_type = &simple_children_type,
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above.  However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem.  Creation of a group in the subsystem creates
+ * a new simple_children group.  That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(
+		struct config_group *group, const char *name)
+{
+	struct simple_children *simple_children;
+
+	simple_children = kzalloc(sizeof(struct simple_children),
+				  GFP_KERNEL);
+	if (!simple_children)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&simple_children->group, name,
+				    &simple_children_type);
+
+	return &simple_children->group;
+}
+
+static ssize_t group_children_description_show(struct config_item *item,
+		char *page)
+{
+	return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups.  These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+CONFIGFS_ATTR_RO(group_children_, description);
+
+static struct configfs_attribute *group_children_attrs[] = {
+	&group_children_attr_description,
+	NULL,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+	.make_group	= group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+	.ct_group_ops	= &group_children_group_ops,
+	.ct_attrs	= group_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "03-group-children",
+			.ci_type = &group_children_type,
+		},
+	},
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all.  It
+ * allows the init function to easily register them.  Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+	&childless_subsys.subsys,
+	&simple_children_subsys,
+	&group_children_subsys,
+	NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+	int ret;
+	int i;
+	struct configfs_subsystem *subsys;
+
+	for (i = 0; example_subsys[i]; i++) {
+		subsys = example_subsys[i];
+
+		config_group_init(&subsys->su_group);
+		mutex_init(&subsys->su_mutex);
+		ret = configfs_register_subsystem(subsys);
+		if (ret) {
+			printk(KERN_ERR "Error %d while registering subsystem %s\n",
+			       ret,
+			       subsys->su_group.cg_item.ci_namebuf);
+			goto out_unregister;
+		}
+	}
+
+	return 0;
+
+out_unregister:
+	for (i--; i >= 0; i--)
+		configfs_unregister_subsystem(example_subsys[i]);
+
+	return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+	int i;
+
+	for (i = 0; example_subsys[i]; i++)
+		configfs_unregister_subsystem(example_subsys[i]);
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From d28c2b36d6027702585ca93773b3edd6e5f1a5bd Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Oct 2015 14:42:44 +0300
Subject: ARM: common: edma: Remove unused functions

We no longer have users for these functions so they can be removed.
Remove also unused enums from the header file.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 376 -------------------------------------
 include/linux/platform_data/edma.h |  33 ----
 2 files changed, 409 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 56fc339571f9..e9c4cb16a47e 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -510,62 +510,6 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int reserve_contiguous_slots(int ctlr, unsigned int id,
-				     unsigned int num_slots,
-				     unsigned int start_slot)
-{
-	int i, j;
-	unsigned int count = num_slots;
-	int stop_slot = start_slot;
-	DECLARE_BITMAP(tmp_inuse, EDMA_MAX_PARAMENTRY);
-
-	for (i = start_slot; i < edma_cc[ctlr]->num_slots; ++i) {
-		j = EDMA_CHAN_SLOT(i);
-		if (!test_and_set_bit(j, edma_cc[ctlr]->edma_inuse)) {
-			/* Record our current beginning slot */
-			if (count == num_slots)
-				stop_slot = i;
-
-			count--;
-			set_bit(j, tmp_inuse);
-
-			if (count == 0)
-				break;
-		} else {
-			clear_bit(j, tmp_inuse);
-
-			if (id == EDMA_CONT_PARAMS_FIXED_EXACT) {
-				stop_slot = i;
-				break;
-			} else {
-				count = num_slots;
-			}
-		}
-	}
-
-	/*
-	 * We have to clear any bits that we set
-	 * if we run out parameter RAM slots, i.e we do find a set
-	 * of contiguous parameter RAM slots but do not find the exact number
-	 * requested as we may reach the total number of parameter RAM slots
-	 */
-	if (i == edma_cc[ctlr]->num_slots)
-		stop_slot = i;
-
-	j = start_slot;
-	for_each_set_bit_from(j, tmp_inuse, stop_slot)
-		clear_bit(j, edma_cc[ctlr]->edma_inuse);
-
-	if (count)
-		return -EBUSY;
-
-	for (j = i - num_slots + 1; j <= i; ++j)
-		memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(j),
-			&dummy_paramset, PARM_SIZE);
-
-	return EDMA_CTLR_CHAN(ctlr, i - num_slots + 1);
-}
-
 static int prepare_unused_channel_list(struct device *dev, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -818,185 +762,10 @@ void edma_free_slot(unsigned slot)
 }
 EXPORT_SYMBOL(edma_free_slot);
 
-
-/**
- * edma_alloc_cont_slots- alloc contiguous parameter RAM slots
- * The API will return the starting point of a set of
- * contiguous parameter RAM slots that have been requested
- *
- * @id: can only be EDMA_CONT_PARAMS_ANY or EDMA_CONT_PARAMS_FIXED_EXACT
- * or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
- * @count: number of contiguous Paramter RAM slots
- * @slot  - the start value of Parameter RAM slot that should be passed if id
- * is EDMA_CONT_PARAMS_FIXED_EXACT or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
- *
- * If id is EDMA_CONT_PARAMS_ANY then the API starts looking for a set of
- * contiguous Parameter RAM slots from parameter RAM 64 in the case of
- * DaVinci SOCs and 32 in the case of DA8xx SOCs.
- *
- * If id is EDMA_CONT_PARAMS_FIXED_EXACT then the API starts looking for a
- * set of contiguous parameter RAM slots from the "slot" that is passed as an
- * argument to the API.
- *
- * If id is EDMA_CONT_PARAMS_FIXED_NOT_EXACT then the API initially tries
- * starts looking for a set of contiguous parameter RAMs from the "slot"
- * that is passed as an argument to the API. On failure the API will try to
- * find a set of contiguous Parameter RAM slots from the remaining Parameter
- * RAM slots
- */
-int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count)
-{
-	/*
-	 * The start slot requested should be greater than
-	 * the number of channels and lesser than the total number
-	 * of slots
-	 */
-	if ((id != EDMA_CONT_PARAMS_ANY) &&
-		(slot < edma_cc[ctlr]->num_channels ||
-		slot >= edma_cc[ctlr]->num_slots))
-		return -EINVAL;
-
-	/*
-	 * The number of parameter RAM slots requested cannot be less than 1
-	 * and cannot be more than the number of slots minus the number of
-	 * channels
-	 */
-	if (count < 1 || count >
-		(edma_cc[ctlr]->num_slots - edma_cc[ctlr]->num_channels))
-		return -EINVAL;
-
-	switch (id) {
-	case EDMA_CONT_PARAMS_ANY:
-		return reserve_contiguous_slots(ctlr, id, count,
-						 edma_cc[ctlr]->num_channels);
-	case EDMA_CONT_PARAMS_FIXED_EXACT:
-	case EDMA_CONT_PARAMS_FIXED_NOT_EXACT:
-		return reserve_contiguous_slots(ctlr, id, count, slot);
-	default:
-		return -EINVAL;
-	}
-
-}
-EXPORT_SYMBOL(edma_alloc_cont_slots);
-
-/**
- * edma_free_cont_slots - deallocate DMA parameter RAM slots
- * @slot: first parameter RAM of a set of parameter RAM slots to be freed
- * @count: the number of contiguous parameter RAM slots to be freed
- *
- * This deallocates the parameter RAM slots allocated by
- * edma_alloc_cont_slots.
- * Callers/applications need to keep track of sets of contiguous
- * parameter RAM slots that have been allocated using the edma_alloc_cont_slots
- * API.
- * Callers are responsible for ensuring the slots are inactive, and will
- * not be activated.
- */
-int edma_free_cont_slots(unsigned slot, int count)
-{
-	unsigned ctlr, slot_to_free;
-	int i;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_channels ||
-		slot >= edma_cc[ctlr]->num_slots ||
-		count < 1)
-		return -EINVAL;
-
-	for (i = slot; i < slot + count; ++i) {
-		ctlr = EDMA_CTLR(i);
-		slot_to_free = EDMA_CHAN_SLOT(i);
-
-		memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(slot_to_free),
-			&dummy_paramset, PARM_SIZE);
-		clear_bit(slot_to_free, edma_cc[ctlr]->edma_inuse);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(edma_free_cont_slots);
-
 /*-----------------------------------------------------------------------*/
 
 /* Parameter RAM operations (i) -- read/write partial slots */
 
-/**
- * edma_set_src - set initial DMA source address in parameter RAM slot
- * @slot: parameter RAM slot being configured
- * @src_port: physical address of source (memory, controller FIFO, etc)
- * @addressMode: INCR, except in very rare cases
- * @fifoWidth: ignored unless @addressMode is FIFO, else specifies the
- *	width to use when addressing the fifo (e.g. W8BIT, W32BIT)
- *
- * Note that the source address is modified during the DMA transfer
- * according to edma_set_src_index().
- */
-void edma_set_src(unsigned slot, dma_addr_t src_port,
-				enum address_mode mode, enum fifo_width width)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		unsigned int i = edma_parm_read(ctlr, PARM_OPT, slot);
-
-		if (mode) {
-			/* set SAM and program FWID */
-			i = (i & ~(EDMA_FWID)) | (SAM | ((width & 0x7) << 8));
-		} else {
-			/* clear SAM */
-			i &= ~SAM;
-		}
-		edma_parm_write(ctlr, PARM_OPT, slot, i);
-
-		/* set the source port address
-		   in source register of param structure */
-		edma_parm_write(ctlr, PARM_SRC, slot, src_port);
-	}
-}
-EXPORT_SYMBOL(edma_set_src);
-
-/**
- * edma_set_dest - set initial DMA destination address in parameter RAM slot
- * @slot: parameter RAM slot being configured
- * @dest_port: physical address of destination (memory, controller FIFO, etc)
- * @addressMode: INCR, except in very rare cases
- * @fifoWidth: ignored unless @addressMode is FIFO, else specifies the
- *	width to use when addressing the fifo (e.g. W8BIT, W32BIT)
- *
- * Note that the destination address is modified during the DMA transfer
- * according to edma_set_dest_index().
- */
-void edma_set_dest(unsigned slot, dma_addr_t dest_port,
-				 enum address_mode mode, enum fifo_width width)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		unsigned int i = edma_parm_read(ctlr, PARM_OPT, slot);
-
-		if (mode) {
-			/* set DAM and program FWID */
-			i = (i & ~(EDMA_FWID)) | (DAM | ((width & 0x7) << 8));
-		} else {
-			/* clear DAM */
-			i &= ~DAM;
-		}
-		edma_parm_write(ctlr, PARM_OPT, slot, i);
-		/* set the destination port address
-		   in dest register of param structure */
-		edma_parm_write(ctlr, PARM_DST, slot, dest_port);
-	}
-}
-EXPORT_SYMBOL(edma_set_dest);
-
 /**
  * edma_get_position - returns the current transfer point
  * @slot: parameter RAM slot being examined
@@ -1016,110 +785,6 @@ dma_addr_t edma_get_position(unsigned slot, bool dst)
 	return edma_read(ctlr, offs);
 }
 
-/**
- * edma_set_src_index - configure DMA source address indexing
- * @slot: parameter RAM slot being configured
- * @src_bidx: byte offset between source arrays in a frame
- * @src_cidx: byte offset between source frames in a block
- *
- * Offsets are specified to support either contiguous or discontiguous
- * memory transfers, or repeated access to a hardware register, as needed.
- * When accessing hardware registers, both offsets are normally zero.
- */
-void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		edma_parm_modify(ctlr, PARM_SRC_DST_BIDX, slot,
-				0xffff0000, src_bidx);
-		edma_parm_modify(ctlr, PARM_SRC_DST_CIDX, slot,
-				0xffff0000, src_cidx);
-	}
-}
-EXPORT_SYMBOL(edma_set_src_index);
-
-/**
- * edma_set_dest_index - configure DMA destination address indexing
- * @slot: parameter RAM slot being configured
- * @dest_bidx: byte offset between destination arrays in a frame
- * @dest_cidx: byte offset between destination frames in a block
- *
- * Offsets are specified to support either contiguous or discontiguous
- * memory transfers, or repeated access to a hardware register, as needed.
- * When accessing hardware registers, both offsets are normally zero.
- */
-void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		edma_parm_modify(ctlr, PARM_SRC_DST_BIDX, slot,
-				0x0000ffff, dest_bidx << 16);
-		edma_parm_modify(ctlr, PARM_SRC_DST_CIDX, slot,
-				0x0000ffff, dest_cidx << 16);
-	}
-}
-EXPORT_SYMBOL(edma_set_dest_index);
-
-/**
- * edma_set_transfer_params - configure DMA transfer parameters
- * @slot: parameter RAM slot being configured
- * @acnt: how many bytes per array (at least one)
- * @bcnt: how many arrays per frame (at least one)
- * @ccnt: how many frames per block (at least one)
- * @bcnt_rld: used only for A-Synchronized transfers; this specifies
- *	the value to reload into bcnt when it decrements to zero
- * @sync_mode: ASYNC or ABSYNC
- *
- * See the EDMA3 documentation to understand how to configure and link
- * transfers using the fields in PaRAM slots.  If you are not doing it
- * all at once with edma_write_slot(), you will use this routine
- * plus two calls each for source and destination, setting the initial
- * address and saying how to index that address.
- *
- * An example of an A-Synchronized transfer is a serial link using a
- * single word shift register.  In that case, @acnt would be equal to
- * that word size; the serial controller issues a DMA synchronization
- * event to transfer each word, and memory access by the DMA transfer
- * controller will be word-at-a-time.
- *
- * An example of an AB-Synchronized transfer is a device using a FIFO.
- * In that case, @acnt equals the FIFO width and @bcnt equals its depth.
- * The controller with the FIFO issues DMA synchronization events when
- * the FIFO threshold is reached, and the DMA transfer controller will
- * transfer one frame to (or from) the FIFO.  It will probably use
- * efficient burst modes to access memory.
- */
-void edma_set_transfer_params(unsigned slot,
-		u16 acnt, u16 bcnt, u16 ccnt,
-		u16 bcnt_rld, enum sync_dimension sync_mode)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
-	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_slots) {
-		edma_parm_modify(ctlr, PARM_LINK_BCNTRLD, slot,
-				0x0000ffff, bcnt_rld << 16);
-		if (sync_mode == ASYNC)
-			edma_parm_and(ctlr, PARM_OPT, slot, ~SYNCDIM);
-		else
-			edma_parm_or(ctlr, PARM_OPT, slot, SYNCDIM);
-		/* Set the acount, bcount, ccount registers */
-		edma_parm_write(ctlr, PARM_A_B_CNT, slot, (bcnt << 16) | acnt);
-		edma_parm_write(ctlr, PARM_CCNT, slot, ccnt);
-	}
-}
-EXPORT_SYMBOL(edma_set_transfer_params);
-
 /**
  * edma_link - link one parameter RAM slot to another
  * @from: parameter RAM slot originating the link
@@ -1145,26 +810,6 @@ void edma_link(unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(edma_link);
 
-/**
- * edma_unlink - cut link from one parameter RAM slot
- * @from: parameter RAM slot originating the link
- *
- * The originating slot should not be part of any active DMA transfer.
- * Its link is set to 0xffff.
- */
-void edma_unlink(unsigned from)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(from);
-	from = EDMA_CHAN_SLOT(from);
-
-	if (from >= edma_cc[ctlr]->num_slots)
-		return;
-	edma_parm_or(ctlr, PARM_LINK_BCNTRLD, from, 0xffff);
-}
-EXPORT_SYMBOL(edma_unlink);
-
 /*-----------------------------------------------------------------------*/
 
 /* Parameter RAM operations (ii) -- read/write whole parameter sets */
@@ -1401,27 +1046,6 @@ void edma_clean_channel(unsigned channel)
 }
 EXPORT_SYMBOL(edma_clean_channel);
 
-/*
- * edma_clear_event - clear an outstanding event on the DMA channel
- * Arguments:
- *	channel - channel number
- */
-void edma_clear_event(unsigned channel)
-{
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel >= edma_cc[ctlr]->num_channels)
-		return;
-	if (channel < 32)
-		edma_write(ctlr, EDMA_ECR, BIT(channel));
-	else
-		edma_write(ctlr, EDMA_ECRH, BIT(channel - 32));
-}
-EXPORT_SYMBOL(edma_clear_event);
-
 /*
  * edma_assign_channel_eventq - move given channel to desired eventq
  * Arguments:
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index bdb2710e2aab..c1862423b356 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -72,20 +72,6 @@ struct edmacc_param {
 #define EDMA_DMA_TC1_ERROR 3
 #define EDMA_DMA_TC2_ERROR 4
 
-enum address_mode {
-	INCR = 0,
-	FIFO = 1
-};
-
-enum fifo_width {
-	W8BIT = 0,
-	W16BIT = 1,
-	W32BIT = 2,
-	W64BIT = 3,
-	W128BIT = 4,
-	W256BIT = 5
-};
-
 enum dma_event_q {
 	EVENTQ_0 = 0,
 	EVENTQ_1 = 1,
@@ -94,11 +80,6 @@ enum dma_event_q {
 	EVENTQ_DEFAULT = -1
 };
 
-enum sync_dimension {
-	ASYNC = 0,
-	ABSYNC = 1
-};
-
 #define EDMA_CTLR_CHAN(ctlr, chan)	(((ctlr) << 16) | (chan))
 #define EDMA_CTLR(i)			((i) >> 16)
 #define EDMA_CHAN_SLOT(i)		((i) & 0xffff)
@@ -121,22 +102,9 @@ void edma_free_channel(unsigned channel);
 int edma_alloc_slot(unsigned ctlr, int slot);
 void edma_free_slot(unsigned slot);
 
-/* alloc/free a set of contiguous parameter RAM slots */
-int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count);
-int edma_free_cont_slots(unsigned slot, int count);
-
 /* calls that operate on part of a parameter RAM slot */
-void edma_set_src(unsigned slot, dma_addr_t src_port,
-				enum address_mode mode, enum fifo_width);
-void edma_set_dest(unsigned slot, dma_addr_t dest_port,
-				 enum address_mode mode, enum fifo_width);
 dma_addr_t edma_get_position(unsigned slot, bool dst);
-void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx);
-void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx);
-void edma_set_transfer_params(unsigned slot, u16 acnt, u16 bcnt, u16 ccnt,
-		u16 bcnt_rld, enum sync_dimension sync_mode);
 void edma_link(unsigned from, unsigned to);
-void edma_unlink(unsigned from);
 
 /* calls that operate on an entire parameter RAM slot */
 void edma_write_slot(unsigned slot, const struct edmacc_param *params);
@@ -146,7 +114,6 @@ void edma_read_slot(unsigned slot, struct edmacc_param *params);
 int edma_start(unsigned channel);
 void edma_stop(unsigned channel);
 void edma_clean_channel(unsigned channel);
-void edma_clear_event(unsigned channel);
 void edma_pause(unsigned channel);
 void edma_resume(unsigned channel);
 
-- 
cgit v1.2.3


From ca304fa9bb762f091e851d48de43f623c975d47a Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Oct 2015 14:42:49 +0300
Subject: ARM/dmaengine: edma: Public API to use private struct pointer

Instead of relying on indexes pointing to edma private date in the global
pointer array, pass the private data pointer via the public API.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 305 ++++++++++++++++++-------------------
 drivers/dma/edma.c                 |  79 +++++-----
 include/linux/platform_data/edma.h |  38 +++--
 3 files changed, 214 insertions(+), 208 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 0b4c0ee59ed9..03692520812a 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -130,7 +130,7 @@ struct edma {
 
 	struct edma_soc_info *info;
 	int		id;
-
+	bool		unused_chan_list_done;
 	/* The edma_inuse bit for each PaRAM slot is clear unless the
 	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
 	 */
@@ -264,7 +264,6 @@ static inline void clear_bits(int offset, int len, unsigned long *p)
 }
 
 /*****************************************************************************/
-static struct edma *edma_cc[EDMA_MAX_CC];
 static int arch_num_cc;
 
 /* dummy param set used to (re)initialize parameter RAM slots */
@@ -490,14 +489,18 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 static int prepare_unused_channel_list(struct device *dev, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
-	int i, count, ctlr;
+	struct edma *cc = data;
+	int i, count;
 	struct of_phandle_args  dma_spec;
 
 	if (dev->of_node) {
+		struct platform_device *dma_pdev;
+
 		count = of_property_count_strings(dev->of_node, "dma-names");
 		if (count < 0)
 			return 0;
 		for (i = 0; i < count; i++) {
+
 			if (of_parse_phandle_with_args(dev->of_node, "dmas",
 						       "#dma-cells", i,
 						       &dma_spec))
@@ -508,8 +511,12 @@ static int prepare_unused_channel_list(struct device *dev, void *data)
 				continue;
 			}
 
+			dma_pdev = of_find_device_by_node(dma_spec.np);
+			if (&dma_pdev->dev != cc->dev)
+				continue;
+
 			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
-				  edma_cc[0]->edma_unused);
+				  cc->edma_unused);
 			of_node_put(dma_spec.np);
 		}
 		return 0;
@@ -517,11 +524,11 @@ static int prepare_unused_channel_list(struct device *dev, void *data)
 
 	/* For non-OF case */
 	for (i = 0; i < pdev->num_resources; i++) {
-		if ((pdev->resource[i].flags & IORESOURCE_DMA) &&
-				(int)pdev->resource[i].start >= 0) {
-			ctlr = EDMA_CTLR(pdev->resource[i].start);
+		struct resource	*res = &pdev->resource[i];
+
+		if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
 			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
-				  edma_cc[ctlr]->edma_unused);
+				  cc->edma_unused);
 		}
 	}
 
@@ -530,8 +537,6 @@ static int prepare_unused_channel_list(struct device *dev, void *data)
 
 /*-----------------------------------------------------------------------*/
 
-static bool unused_chan_list_done;
-
 /* Resource alloc/free:  dma channels, parameter RAM slots */
 
 /**
@@ -564,77 +569,73 @@ static bool unused_chan_list_done;
  *
  * Returns the number of the channel, else negative errno.
  */
-int edma_alloc_channel(int channel,
+int edma_alloc_channel(struct edma *cc, int channel,
 		void (*callback)(unsigned channel, u16 ch_status, void *data),
 		void *data,
 		enum dma_event_q eventq_no)
 {
-	unsigned i, done = 0, ctlr = 0;
+	unsigned done = 0;
 	int ret = 0;
 
-	if (!unused_chan_list_done) {
+	if (!cc->unused_chan_list_done) {
 		/*
 		 * Scan all the platform devices to find out the EDMA channels
 		 * used and clear them in the unused list, making the rest
 		 * available for ARM usage.
 		 */
-		ret = bus_for_each_dev(&platform_bus_type, NULL, NULL,
-				prepare_unused_channel_list);
+		ret = bus_for_each_dev(&platform_bus_type, NULL, cc,
+				       prepare_unused_channel_list);
 		if (ret < 0)
 			return ret;
 
-		unused_chan_list_done = true;
+		cc->unused_chan_list_done = true;
 	}
 
 	if (channel >= 0) {
-		ctlr = EDMA_CTLR(channel);
+		if (cc->id != EDMA_CTLR(channel)) {
+			dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n",
+				__func__, cc->id, EDMA_CTLR(channel));
+			return -EINVAL;
+		}
 		channel = EDMA_CHAN_SLOT(channel);
 	}
 
 	if (channel < 0) {
-		for (i = 0; i < arch_num_cc; i++) {
-			channel = 0;
-			for (;;) {
-				channel = find_next_bit(edma_cc[i]->edma_unused,
-						edma_cc[i]->num_channels,
-						channel);
-				if (channel == edma_cc[i]->num_channels)
-					break;
-				if (!test_and_set_bit(channel,
-						edma_cc[i]->edma_inuse)) {
-					done = 1;
-					ctlr = i;
-					break;
-				}
-				channel++;
-			}
-			if (done)
+		channel = 0;
+		for (;;) {
+			channel = find_next_bit(cc->edma_unused,
+						cc->num_channels, channel);
+			if (channel == cc->num_channels)
+				break;
+			if (!test_and_set_bit(channel, cc->edma_inuse)) {
+				done = 1;
 				break;
+			}
+			channel++;
 		}
 		if (!done)
 			return -ENOMEM;
-	} else if (channel >= edma_cc[ctlr]->num_channels) {
+	} else if (channel >= cc->num_channels) {
 		return -EINVAL;
-	} else if (test_and_set_bit(channel, edma_cc[ctlr]->edma_inuse)) {
+	} else if (test_and_set_bit(channel, cc->edma_inuse)) {
 		return -EBUSY;
 	}
 
 	/* ensure access through shadow region 0 */
-	edma_or_array2(edma_cc[ctlr], EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
+	edma_or_array2(cc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
 
 	/* ensure no events are pending */
-	edma_stop(EDMA_CTLR_CHAN(ctlr, channel));
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(channel), &dummy_paramset,
+	edma_stop(cc, EDMA_CTLR_CHAN(cc->id, channel));
+	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
 		    PARM_SIZE);
 
 	if (callback)
-		setup_dma_interrupt(edma_cc[ctlr],
-				    EDMA_CTLR_CHAN(ctlr, channel), callback,
-				    data);
+		setup_dma_interrupt(cc, EDMA_CTLR_CHAN(cc->id, channel),
+				    callback, data);
 
-	map_dmach_queue(edma_cc[ctlr], channel, eventq_no);
+	map_dmach_queue(cc, channel, eventq_no);
 
-	return EDMA_CTLR_CHAN(ctlr, channel);
+	return EDMA_CTLR_CHAN(cc->id, channel);
 }
 EXPORT_SYMBOL(edma_alloc_channel);
 
@@ -650,22 +651,25 @@ EXPORT_SYMBOL(edma_alloc_channel);
  * will not be reactivated by linking, chaining, or software calls to
  * edma_start().
  */
-void edma_free_channel(unsigned channel)
+void edma_free_channel(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
 
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel >= edma_cc[ctlr]->num_channels)
+	if (channel >= cc->num_channels)
 		return;
 
-	setup_dma_interrupt(edma_cc[ctlr], channel, NULL, NULL);
+	setup_dma_interrupt(cc, channel, NULL, NULL);
 	/* REVISIT should probably take out of shadow region 0 */
 
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(channel), &dummy_paramset,
+	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
 		    PARM_SIZE);
-	clear_bit(channel, edma_cc[ctlr]->edma_inuse);
+	clear_bit(channel, cc->edma_inuse);
 }
 EXPORT_SYMBOL(edma_free_channel);
 
@@ -683,35 +687,29 @@ EXPORT_SYMBOL(edma_free_channel);
  *
  * Returns the number of the slot, else negative errno.
  */
-int edma_alloc_slot(unsigned ctlr, int slot)
+int edma_alloc_slot(struct edma *cc, int slot)
 {
-	if (!edma_cc[ctlr])
-		return -EINVAL;
-
-	if (slot >= 0)
+	if (slot > 0)
 		slot = EDMA_CHAN_SLOT(slot);
-
 	if (slot < 0) {
-		slot = edma_cc[ctlr]->num_channels;
+		slot = cc->num_channels;
 		for (;;) {
-			slot = find_next_zero_bit(edma_cc[ctlr]->edma_inuse,
-					edma_cc[ctlr]->num_slots, slot);
-			if (slot == edma_cc[ctlr]->num_slots)
+			slot = find_next_zero_bit(cc->edma_inuse, cc->num_slots,
+						  slot);
+			if (slot == cc->num_slots)
 				return -ENOMEM;
-			if (!test_and_set_bit(slot, edma_cc[ctlr]->edma_inuse))
+			if (!test_and_set_bit(slot, cc->edma_inuse))
 				break;
 		}
-	} else if (slot < edma_cc[ctlr]->num_channels ||
-			slot >= edma_cc[ctlr]->num_slots) {
+	} else if (slot < cc->num_channels || slot >= cc->num_slots) {
 		return -EINVAL;
-	} else if (test_and_set_bit(slot, edma_cc[ctlr]->edma_inuse)) {
+	} else if (test_and_set_bit(slot, cc->edma_inuse)) {
 		return -EBUSY;
 	}
 
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(slot), &dummy_paramset,
-		    PARM_SIZE);
+	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
 
-	return EDMA_CTLR_CHAN(ctlr, slot);
+	return slot;
 }
 EXPORT_SYMBOL(edma_alloc_slot);
 
@@ -723,20 +721,15 @@ EXPORT_SYMBOL(edma_alloc_slot);
  * Callers are responsible for ensuring the slot is inactive, and will
  * not be activated.
  */
-void edma_free_slot(unsigned slot)
+void edma_free_slot(struct edma *cc, unsigned slot)
 {
-	unsigned ctlr;
 
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot < edma_cc[ctlr]->num_channels ||
-		slot >= edma_cc[ctlr]->num_slots)
+	if (slot < cc->num_channels || slot >= cc->num_slots)
 		return;
 
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(slot), &dummy_paramset,
-		    PARM_SIZE);
-	clear_bit(slot, edma_cc[ctlr]->edma_inuse);
+	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
+	clear_bit(slot, cc->edma_inuse);
 }
 EXPORT_SYMBOL(edma_free_slot);
 
@@ -751,16 +744,15 @@ EXPORT_SYMBOL(edma_free_slot);
  *
  * Returns the position of the current active slot
  */
-dma_addr_t edma_get_position(unsigned slot, bool dst)
+dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst)
 {
-	u32 offs, ctlr = EDMA_CTLR(slot);
+	u32 offs;
 
 	slot = EDMA_CHAN_SLOT(slot);
-
 	offs = PARM_OFFSET(slot);
 	offs += dst ? PARM_DST : PARM_SRC;
 
-	return edma_read(edma_cc[ctlr], offs);
+	return edma_read(cc, offs);
 }
 
 /**
@@ -770,21 +762,15 @@ dma_addr_t edma_get_position(unsigned slot, bool dst)
  *
  * The originating slot should not be part of any active DMA transfer.
  */
-void edma_link(unsigned from, unsigned to)
+void edma_link(struct edma *cc, unsigned from, unsigned to)
 {
-	unsigned ctlr_from, ctlr_to;
-
-	ctlr_from = EDMA_CTLR(from);
 	from = EDMA_CHAN_SLOT(from);
-	ctlr_to = EDMA_CTLR(to);
 	to = EDMA_CHAN_SLOT(to);
-
-	if (from >= edma_cc[ctlr_from]->num_slots)
+	if (from >= cc->num_slots || to >= cc->num_slots)
 		return;
-	if (to >= edma_cc[ctlr_to]->num_slots)
-		return;
-	edma_parm_modify(edma_cc[ctlr_from], PARM_LINK_BCNTRLD, from, 0xffff0000,
-				PARM_OFFSET(to));
+
+	edma_parm_modify(cc, PARM_LINK_BCNTRLD, from, 0xffff0000,
+			 PARM_OFFSET(to));
 }
 EXPORT_SYMBOL(edma_link);
 
@@ -802,16 +788,13 @@ EXPORT_SYMBOL(edma_link);
  * calls to set up those parameters in small pieces, and provides
  * complete control over all transfer options.
  */
-void edma_write_slot(unsigned slot, const struct edmacc_param *param)
+void edma_write_slot(struct edma *cc, unsigned slot,
+		     const struct edmacc_param *param)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot >= edma_cc[ctlr]->num_slots)
+	if (slot >= cc->num_slots)
 		return;
-	memcpy_toio(edma_cc[ctlr]->base + PARM_OFFSET(slot), param, PARM_SIZE);
+	memcpy_toio(cc->base + PARM_OFFSET(slot), param, PARM_SIZE);
 }
 EXPORT_SYMBOL(edma_write_slot);
 
@@ -823,17 +806,12 @@ EXPORT_SYMBOL(edma_write_slot);
  * Use this to read data from a parameter RAM slot, perhaps to
  * save them as a template for later reuse.
  */
-void edma_read_slot(unsigned slot, struct edmacc_param *param)
+void edma_read_slot(struct edma *cc, unsigned slot, struct edmacc_param *param)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
-
-	if (slot >= edma_cc[ctlr]->num_slots)
+	if (slot >= cc->num_slots)
 		return;
-	memcpy_fromio(param, edma_cc[ctlr]->base + PARM_OFFSET(slot),
-		      PARM_SIZE);
+	memcpy_fromio(param, cc->base + PARM_OFFSET(slot), PARM_SIZE);
 }
 EXPORT_SYMBOL(edma_read_slot);
 
@@ -848,18 +826,19 @@ EXPORT_SYMBOL(edma_read_slot);
  * This temporarily disables EDMA hardware events on the specified channel,
  * preventing them from triggering new transfers on its behalf
  */
-void edma_pause(unsigned channel)
+void edma_pause(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
+	if (channel < cc->num_channels) {
 		unsigned int mask = BIT(channel & 0x1f);
 
-		edma_shadow0_write_array(edma_cc[ctlr], SH_EECR, channel >> 5,
-					 mask);
+		edma_shadow0_write_array(cc, SH_EECR, channel >> 5, mask);
 	}
 }
 EXPORT_SYMBOL(edma_pause);
@@ -870,36 +849,39 @@ EXPORT_SYMBOL(edma_pause);
  *
  * This re-enables EDMA hardware events on the specified channel.
  */
-void edma_resume(unsigned channel)
+void edma_resume(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
+	if (channel < cc->num_channels) {
 		unsigned int mask = BIT(channel & 0x1f);
 
-		edma_shadow0_write_array(edma_cc[ctlr], SH_EESR, channel >> 5,
-					 mask);
+		edma_shadow0_write_array(cc, SH_EESR, channel >> 5, mask);
 	}
 }
 EXPORT_SYMBOL(edma_resume);
 
-int edma_trigger_channel(unsigned channel)
+int edma_trigger_channel(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
 	unsigned int mask;
 
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 	mask = BIT(channel & 0x1f);
 
-	edma_shadow0_write_array(edma_cc[ctlr], SH_ESR, (channel >> 5), mask);
+	edma_shadow0_write_array(cc, SH_ESR, (channel >> 5), mask);
 
 	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
-		 edma_shadow0_read_array(edma_cc[ctlr], SH_ESR,
-					 (channel >> 5)));
+		 edma_shadow0_read_array(cc, SH_ESR, (channel >> 5)));
 	return 0;
 }
 EXPORT_SYMBOL(edma_trigger_channel);
@@ -915,15 +897,16 @@ EXPORT_SYMBOL(edma_trigger_channel);
  *
  * Returns zero on success, else negative errno.
  */
-int edma_start(unsigned channel)
+int edma_start(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
-		struct edma *cc = edma_cc[ctlr];
+	if (channel < cc->num_channels) {
 		int j = channel >> 5;
 		unsigned int mask = BIT(channel & 0x1f);
 
@@ -962,15 +945,16 @@ EXPORT_SYMBOL(edma_start);
  * may not be resumed, and the channel's Parameter RAM should be
  * reinitialized before being reused.
  */
-void edma_stop(unsigned channel)
+void edma_stop(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
-		struct edma *cc = edma_cc[ctlr];
+	if (channel < cc->num_channels) {
 		int j = channel >> 5;
 		unsigned int mask = BIT(channel & 0x1f);
 
@@ -1005,15 +989,16 @@ EXPORT_SYMBOL(edma_stop);
  *
  *****************************************************************************/
 
-void edma_clean_channel(unsigned channel)
+void edma_clean_channel(struct edma *cc, unsigned channel)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel < edma_cc[ctlr]->num_channels) {
-		struct edma *cc = edma_cc[ctlr];
+	if (channel < cc->num_channels) {
 		int j = (channel >> 5);
 		unsigned int mask = BIT(channel & 0x1f);
 
@@ -1037,26 +1022,35 @@ EXPORT_SYMBOL(edma_clean_channel);
  *
  * Can be used to move a channel to a selected event queue.
  */
-void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no)
+void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
+				enum dma_event_q eventq_no)
 {
-	unsigned ctlr;
-
-	ctlr = EDMA_CTLR(channel);
+	if (cc->id != EDMA_CTLR(channel)) {
+		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			cc->id, EDMA_CTLR(channel));
+		return;
+	}
 	channel = EDMA_CHAN_SLOT(channel);
 
-	if (channel >= edma_cc[ctlr]->num_channels)
+	if (channel >= cc->num_channels)
 		return;
 
 	/* default to low priority queue */
 	if (eventq_no == EVENTQ_DEFAULT)
-		eventq_no = edma_cc[ctlr]->default_queue;
-	if (eventq_no >= edma_cc[ctlr]->num_tc)
+		eventq_no = cc->default_queue;
+	if (eventq_no >= cc->num_tc)
 		return;
 
-	map_dmach_queue(edma_cc[ctlr], channel, eventq_no);
+	map_dmach_queue(cc, channel, eventq_no);
 }
 EXPORT_SYMBOL(edma_assign_channel_eventq);
 
+struct edma *edma_get_data(struct device *edma_dev)
+{
+	return dev_get_drvdata(edma_dev);
+}
+
+
 static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
 			      struct edma *edma_cc, int cc_id)
 {
@@ -1278,11 +1272,10 @@ static int edma_probe(struct platform_device *pdev)
 		}
 	}
 
-	edma_cc[dev_id] = devm_kzalloc(dev, sizeof(struct edma), GFP_KERNEL);
-	if (!edma_cc[dev_id])
+	cc = devm_kzalloc(dev, sizeof(struct edma), GFP_KERNEL);
+	if (!cc)
 		return -ENOMEM;
 
-	cc = edma_cc[dev_id];
 	cc->dev = dev;
 	cc->id = dev_id;
 	dev_set_drvdata(dev, cc);
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index fcb4680efed7..53d48b2a700d 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -119,6 +119,7 @@ struct edma_chan {
 };
 
 struct edma_cc {
+	struct edma			*cc;
 	int				ctlr;
 	struct dma_device		dma_slave;
 	struct edma_chan		slave_chans[EDMA_CHANS];
@@ -150,6 +151,7 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
 /* Dispatch a queued descriptor to the controller (caller holds lock) */
 static void edma_execute(struct edma_chan *echan)
 {
+	struct edma *cc = echan->ecc->cc;
 	struct virt_dma_desc *vdesc;
 	struct edma_desc *edesc;
 	struct device *dev = echan->vchan.chan.device->dev;
@@ -174,7 +176,7 @@ static void edma_execute(struct edma_chan *echan)
 	/* Write descriptor PaRAM set(s) */
 	for (i = 0; i < nslots; i++) {
 		j = i + edesc->processed;
-		edma_write_slot(echan->slot[i], &edesc->pset[j].param);
+		edma_write_slot(cc, echan->slot[i], &edesc->pset[j].param);
 		edesc->sg_len += edesc->pset[j].len;
 		dev_vdbg(echan->vchan.chan.device->dev,
 			"\n pset[%d]:\n"
@@ -199,7 +201,7 @@ static void edma_execute(struct edma_chan *echan)
 			edesc->pset[j].param.link_bcntrld);
 		/* Link to the previous slot if not the last set */
 		if (i != (nslots - 1))
-			edma_link(echan->slot[i], echan->slot[i+1]);
+			edma_link(cc, echan->slot[i], echan->slot[i+1]);
 	}
 
 	edesc->processed += nslots;
@@ -211,9 +213,9 @@ static void edma_execute(struct edma_chan *echan)
 	 */
 	if (edesc->processed == edesc->pset_nr) {
 		if (edesc->cyclic)
-			edma_link(echan->slot[nslots-1], echan->slot[1]);
+			edma_link(cc, echan->slot[nslots-1], echan->slot[1]);
 		else
-			edma_link(echan->slot[nslots-1],
+			edma_link(cc, echan->slot[nslots-1],
 				  echan->ecc->dummy_slot);
 	}
 
@@ -224,19 +226,19 @@ static void edma_execute(struct edma_chan *echan)
 		 * transfers of MAX_NR_SG
 		 */
 		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
-		edma_clean_channel(echan->ch_num);
-		edma_stop(echan->ch_num);
-		edma_start(echan->ch_num);
-		edma_trigger_channel(echan->ch_num);
+		edma_clean_channel(cc, echan->ch_num);
+		edma_stop(cc, echan->ch_num);
+		edma_start(cc, echan->ch_num);
+		edma_trigger_channel(cc, echan->ch_num);
 		echan->missed = 0;
 	} else if (edesc->processed <= MAX_NR_SG) {
 		dev_dbg(dev, "first transfer starting on channel %d\n",
 			echan->ch_num);
-		edma_start(echan->ch_num);
+		edma_start(cc, echan->ch_num);
 	} else {
 		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
 			echan->ch_num, edesc->processed);
-		edma_resume(echan->ch_num);
+		edma_resume(cc, echan->ch_num);
 	}
 }
 
@@ -254,10 +256,11 @@ static int edma_terminate_all(struct dma_chan *chan)
 	 * echan->edesc is NULL and exit.)
 	 */
 	if (echan->edesc) {
-		edma_stop(echan->ch_num);
+		edma_stop(echan->ecc->cc, echan->ch_num);
 		/* Move the cyclic channel back to default queue */
 		if (echan->edesc->cyclic)
-			edma_assign_channel_eventq(echan->ch_num,
+			edma_assign_channel_eventq(echan->ecc->cc,
+						   echan->ch_num,
 						   EVENTQ_DEFAULT);
 		/*
 		 * free the running request descriptor
@@ -295,7 +298,7 @@ static int edma_dma_pause(struct dma_chan *chan)
 	if (!echan->edesc)
 		return -EINVAL;
 
-	edma_pause(echan->ch_num);
+	edma_pause(echan->ecc->cc, echan->ch_num);
 	return 0;
 }
 
@@ -303,7 +306,7 @@ static int edma_dma_resume(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 
-	edma_resume(echan->ch_num);
+	edma_resume(echan->ecc->cc, echan->ch_num);
 	return 0;
 }
 
@@ -485,8 +488,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	for (i = 0; i < nslots; i++) {
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
-						EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -641,8 +643,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 		/* Allocate a PaRAM slot, if needed */
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
-						EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -703,7 +704,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	}
 
 	/* Place the cyclic channel to highest priority queue */
-	edma_assign_channel_eventq(echan->ch_num, EVENTQ_0);
+	edma_assign_channel_eventq(echan->ecc->cc, echan->ch_num, EVENTQ_0);
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -711,6 +712,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 {
 	struct edma_chan *echan = data;
+	struct edma *cc = echan->ecc->cc;
 	struct device *dev = echan->vchan.chan.device->dev;
 	struct edma_desc *edesc;
 	struct edmacc_param p;
@@ -727,13 +729,13 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 			} else if (edesc->processed == edesc->pset_nr) {
 				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
 				edesc->residue = 0;
-				edma_stop(echan->ch_num);
+				edma_stop(cc, echan->ch_num);
 				vchan_cookie_complete(&edesc->vdesc);
 				echan->edesc = NULL;
 			} else {
 				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
 
-				edma_pause(echan->ch_num);
+				edma_pause(cc, echan->ch_num);
 
 				/* Update statistics for tx_status */
 				edesc->residue -= edesc->sg_len;
@@ -744,7 +746,7 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 		}
 		break;
 	case EDMA_DMA_CC_ERROR:
-		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
+		edma_read_slot(cc, echan->slot[0], &p);
 
 		/*
 		 * Issue later based on missed flag which will be sure
@@ -767,10 +769,10 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 			 * missed, so its safe to issue it here.
 			 */
 			dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n");
-			edma_clean_channel(echan->ch_num);
-			edma_stop(echan->ch_num);
-			edma_start(echan->ch_num);
-			edma_trigger_channel(echan->ch_num);
+			edma_clean_channel(cc, echan->ch_num);
+			edma_stop(cc, echan->ch_num);
+			edma_start(cc, echan->ch_num);
+			edma_trigger_channel(cc, echan->ch_num);
 		}
 		break;
 	default:
@@ -789,8 +791,8 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	int a_ch_num;
 	LIST_HEAD(descs);
 
-	a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
-					echan, EVENTQ_DEFAULT);
+	a_ch_num = edma_alloc_channel(echan->ecc->cc, echan->ch_num,
+				      edma_callback, echan, EVENTQ_DEFAULT);
 
 	if (a_ch_num < 0) {
 		ret = -ENODEV;
@@ -814,7 +816,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	return 0;
 
 err_wrong_chan:
-	edma_free_channel(a_ch_num);
+	edma_free_channel(echan->ecc->cc, a_ch_num);
 err_no_chan:
 	return ret;
 }
@@ -827,21 +829,21 @@ static void edma_free_chan_resources(struct dma_chan *chan)
 	int i;
 
 	/* Terminate transfers */
-	edma_stop(echan->ch_num);
+	edma_stop(echan->ecc->cc, echan->ch_num);
 
 	vchan_free_chan_resources(&echan->vchan);
 
 	/* Free EDMA PaRAM slots */
 	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
 		if (echan->slot[i] >= 0) {
-			edma_free_slot(echan->slot[i]);
+			edma_free_slot(echan->ecc->cc, echan->slot[i]);
 			echan->slot[i] = -1;
 		}
 	}
 
 	/* Free EDMA channel */
 	if (echan->alloced) {
-		edma_free_channel(echan->ch_num);
+		edma_free_channel(echan->ecc->cc, echan->ch_num);
 		echan->alloced = false;
 	}
 
@@ -871,7 +873,8 @@ static u32 edma_residue(struct edma_desc *edesc)
 	 * We always read the dst/src position from the first RamPar
 	 * pset. That's the one which is active now.
 	 */
-	pos = edma_get_position(edesc->echan->slot[0], dst);
+	pos = edma_get_position(edesc->echan->ecc->cc, edesc->echan->slot[0],
+				dst);
 
 	/*
 	 * Cyclic is simple. Just subtract pset[0].addr from pos.
@@ -1008,8 +1011,12 @@ static int edma_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	ecc->cc = edma_get_data(pdev->dev.parent);
+	if (!ecc->cc)
+		return -ENODEV;
+
 	ecc->ctlr = pdev->id;
-	ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY);
+	ecc->dummy_slot = edma_alloc_slot(ecc->cc, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
 		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
 		return ecc->dummy_slot;
@@ -1042,7 +1049,7 @@ static int edma_probe(struct platform_device *pdev)
 	return 0;
 
 err_reg1:
-	edma_free_slot(ecc->dummy_slot);
+	edma_free_slot(ecc->cc, ecc->dummy_slot);
 	return ret;
 }
 
@@ -1055,7 +1062,7 @@ static int edma_remove(struct platform_device *pdev)
 	if (parent_node)
 		of_dma_controller_free(parent_node);
 	dma_async_device_unregister(&ecc->dma_slave);
-	edma_free_slot(ecc->dummy_slot);
+	edma_free_slot(ecc->cc, ecc->dummy_slot);
 
 	return 0;
 }
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index c1862423b356..466021c03169 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -92,32 +92,40 @@ enum dma_event_q {
 
 #define EDMA_MAX_CC               2
 
+struct edma;
+
+struct edma *edma_get_data(struct device *edma_dev);
+
 /* alloc/free DMA channels and their dedicated parameter RAM slots */
-int edma_alloc_channel(int channel,
+int edma_alloc_channel(struct edma *cc, int channel,
 	void (*callback)(unsigned channel, u16 ch_status, void *data),
 	void *data, enum dma_event_q);
-void edma_free_channel(unsigned channel);
+void edma_free_channel(struct edma *cc, unsigned channel);
 
 /* alloc/free parameter RAM slots */
-int edma_alloc_slot(unsigned ctlr, int slot);
-void edma_free_slot(unsigned slot);
+int edma_alloc_slot(struct edma *cc, int slot);
+void edma_free_slot(struct edma *cc, unsigned slot);
 
 /* calls that operate on part of a parameter RAM slot */
-dma_addr_t edma_get_position(unsigned slot, bool dst);
-void edma_link(unsigned from, unsigned to);
+dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst);
+void edma_link(struct edma *cc, unsigned from, unsigned to);
 
 /* calls that operate on an entire parameter RAM slot */
-void edma_write_slot(unsigned slot, const struct edmacc_param *params);
-void edma_read_slot(unsigned slot, struct edmacc_param *params);
+void edma_write_slot(struct edma *cc, unsigned slot,
+		     const struct edmacc_param *params);
+void edma_read_slot(struct edma *cc, unsigned slot,
+		    struct edmacc_param *params);
 
 /* channel control operations */
-int edma_start(unsigned channel);
-void edma_stop(unsigned channel);
-void edma_clean_channel(unsigned channel);
-void edma_pause(unsigned channel);
-void edma_resume(unsigned channel);
+int edma_start(struct edma *cc, unsigned channel);
+void edma_stop(struct edma *cc, unsigned channel);
+void edma_clean_channel(struct edma *cc, unsigned channel);
+void edma_pause(struct edma *cc, unsigned channel);
+void edma_resume(struct edma *cc, unsigned channel);
+int edma_trigger_channel(struct edma *cc, unsigned channel);
 
-void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no);
+void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
+				enum dma_event_q eventq_no);
 
 struct edma_rsv_info {
 
@@ -141,6 +149,4 @@ struct edma_soc_info {
 	const s16	(*xbar_chans)[2];
 };
 
-int edma_trigger_channel(unsigned);
-
 #endif
-- 
cgit v1.2.3


From 2b6b3b7420190888793c49e97276e1e73bd7eaed Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Oct 2015 14:42:53 +0300
Subject: ARM/dmaengine: edma: Merge the two drivers under drivers/dma/

Move the code out from arch/arm/common and merge it inside of the dmaengine
driver.
This change is done with as minimal (if eny) functional change to the code
as possible to avoid introducing regression.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/Kconfig                   |    1 -
 arch/arm/common/Kconfig            |    3 -
 arch/arm/common/Makefile           |    1 -
 arch/arm/common/edma.c             | 1431 ----------------------------------
 arch/arm/mach-omap2/Kconfig        |    1 -
 drivers/dma/Kconfig                |    1 -
 drivers/dma/edma.c                 | 1506 ++++++++++++++++++++++++++++++++++--
 include/linux/platform_data/edma.h |   74 --
 8 files changed, 1431 insertions(+), 1587 deletions(-)
 delete mode 100644 arch/arm/common/edma.c

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 72ad724c67ae..513e38701418 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -736,7 +736,6 @@ config ARCH_DAVINCI
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_CHIP
 	select HAVE_IDE
-	select TI_PRIV_EDMA
 	select USE_OF
 	select ZONE_DMA
 	help
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index c3a4e9ceba34..9353184d730d 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -17,6 +17,3 @@ config SHARP_PARAM
 
 config SHARP_SCOOP
 	bool
-
-config TI_PRIV_EDMA
-	bool
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 6ee5959a813b..27f23b15b1ea 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -15,6 +15,5 @@ obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
 CFLAGS_REMOVE_mcpm_entry.o	= -pg
 AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
-obj-$(CONFIG_TI_PRIV_EDMA)	+= edma.o
 obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
 obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
deleted file mode 100644
index 5b747f1bc8b5..000000000000
--- a/arch/arm/common/edma.c
+++ /dev/null
@@ -1,1431 +0,0 @@
-/*
- * EDMA3 support for DaVinci
- *
- * Copyright (C) 2006-2009 Texas Instruments.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/edma.h>
-#include <linux/dma-mapping.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/pm_runtime.h>
-
-#include <linux/platform_data/edma.h>
-
-/* Offsets matching "struct edmacc_param" */
-#define PARM_OPT		0x00
-#define PARM_SRC		0x04
-#define PARM_A_B_CNT		0x08
-#define PARM_DST		0x0c
-#define PARM_SRC_DST_BIDX	0x10
-#define PARM_LINK_BCNTRLD	0x14
-#define PARM_SRC_DST_CIDX	0x18
-#define PARM_CCNT		0x1c
-
-#define PARM_SIZE		0x20
-
-/* Offsets for EDMA CC global channel registers and their shadows */
-#define SH_ER		0x00	/* 64 bits */
-#define SH_ECR		0x08	/* 64 bits */
-#define SH_ESR		0x10	/* 64 bits */
-#define SH_CER		0x18	/* 64 bits */
-#define SH_EER		0x20	/* 64 bits */
-#define SH_EECR		0x28	/* 64 bits */
-#define SH_EESR		0x30	/* 64 bits */
-#define SH_SER		0x38	/* 64 bits */
-#define SH_SECR		0x40	/* 64 bits */
-#define SH_IER		0x50	/* 64 bits */
-#define SH_IECR		0x58	/* 64 bits */
-#define SH_IESR		0x60	/* 64 bits */
-#define SH_IPR		0x68	/* 64 bits */
-#define SH_ICR		0x70	/* 64 bits */
-#define SH_IEVAL	0x78
-#define SH_QER		0x80
-#define SH_QEER		0x84
-#define SH_QEECR	0x88
-#define SH_QEESR	0x8c
-#define SH_QSER		0x90
-#define SH_QSECR	0x94
-#define SH_SIZE		0x200
-
-/* Offsets for EDMA CC global registers */
-#define EDMA_REV	0x0000
-#define EDMA_CCCFG	0x0004
-#define EDMA_QCHMAP	0x0200	/* 8 registers */
-#define EDMA_DMAQNUM	0x0240	/* 8 registers (4 on OMAP-L1xx) */
-#define EDMA_QDMAQNUM	0x0260
-#define EDMA_QUETCMAP	0x0280
-#define EDMA_QUEPRI	0x0284
-#define EDMA_EMR	0x0300	/* 64 bits */
-#define EDMA_EMCR	0x0308	/* 64 bits */
-#define EDMA_QEMR	0x0310
-#define EDMA_QEMCR	0x0314
-#define EDMA_CCERR	0x0318
-#define EDMA_CCERRCLR	0x031c
-#define EDMA_EEVAL	0x0320
-#define EDMA_DRAE	0x0340	/* 4 x 64 bits*/
-#define EDMA_QRAE	0x0380	/* 4 registers */
-#define EDMA_QUEEVTENTRY	0x0400	/* 2 x 16 registers */
-#define EDMA_QSTAT	0x0600	/* 2 registers */
-#define EDMA_QWMTHRA	0x0620
-#define EDMA_QWMTHRB	0x0624
-#define EDMA_CCSTAT	0x0640
-
-#define EDMA_M		0x1000	/* global channel registers */
-#define EDMA_ECR	0x1008
-#define EDMA_ECRH	0x100C
-#define EDMA_SHADOW0	0x2000	/* 4 regions shadowing global channels */
-#define EDMA_PARM	0x4000	/* 128 param entries */
-
-#define PARM_OFFSET(param_no)	(EDMA_PARM + ((param_no) << 5))
-
-#define EDMA_DCHMAP	0x0100  /* 64 registers */
-
-/* CCCFG register */
-#define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
-#define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
-#define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
-#define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
-#define CHMAP_EXIST		BIT(24)
-
-#define EDMA_MAX_DMACH           64
-#define EDMA_MAX_PARAMENTRY     512
-
-/*****************************************************************************/
-struct edma {
-	struct device	*dev;
-	void __iomem *base;
-
-	/* how many dma resources of each type */
-	unsigned	num_channels;
-	unsigned	num_region;
-	unsigned	num_slots;
-	unsigned	num_tc;
-	enum dma_event_q 	default_queue;
-
-	/* list of channels with no even trigger; terminated by "-1" */
-	const s8	*noevent;
-
-	struct edma_soc_info *info;
-	int		id;
-	bool		unused_chan_list_done;
-	/* The edma_inuse bit for each PaRAM slot is clear unless the
-	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
-	 */
-	DECLARE_BITMAP(edma_inuse, EDMA_MAX_PARAMENTRY);
-
-	/* The edma_unused bit for each channel is clear unless
-	 * it is not being used on this platform. It uses a bit
-	 * of SOC-specific initialization code.
-	 */
-	DECLARE_BITMAP(edma_unused, EDMA_MAX_DMACH);
-
-	struct dma_interrupt_data {
-		void (*callback)(unsigned channel, unsigned short ch_status,
-				void *data);
-		void *data;
-	} intr_data[EDMA_MAX_DMACH];
-};
-/*****************************************************************************/
-
-static inline unsigned int edma_read(struct edma *cc, int offset)
-{
-	return (unsigned int)__raw_readl(cc->base + offset);
-}
-
-static inline void edma_write(struct edma *cc, int offset, int val)
-{
-	__raw_writel(val, cc->base + offset);
-}
-static inline void edma_modify(struct edma *cc, int offset, unsigned and,
-			       unsigned or)
-{
-	unsigned val = edma_read(cc, offset);
-	val &= and;
-	val |= or;
-	edma_write(cc, offset, val);
-}
-static inline void edma_and(struct edma *cc, int offset, unsigned and)
-{
-	unsigned val = edma_read(cc, offset);
-	val &= and;
-	edma_write(cc, offset, val);
-}
-static inline void edma_or(struct edma *cc, int offset, unsigned or)
-{
-	unsigned val = edma_read(cc, offset);
-	val |= or;
-	edma_write(cc, offset, val);
-}
-static inline unsigned int edma_read_array(struct edma *cc, int offset, int i)
-{
-	return edma_read(cc, offset + (i << 2));
-}
-static inline void edma_write_array(struct edma *cc, int offset, int i,
-		unsigned val)
-{
-	edma_write(cc, offset + (i << 2), val);
-}
-static inline void edma_modify_array(struct edma *cc, int offset, int i,
-		unsigned and, unsigned or)
-{
-	edma_modify(cc, offset + (i << 2), and, or);
-}
-static inline void edma_or_array(struct edma *cc, int offset, int i, unsigned or)
-{
-	edma_or(cc, offset + (i << 2), or);
-}
-static inline void edma_or_array2(struct edma *cc, int offset, int i, int j,
-		unsigned or)
-{
-	edma_or(cc, offset + ((i*2 + j) << 2), or);
-}
-static inline void edma_write_array2(struct edma *cc, int offset, int i, int j,
-		unsigned val)
-{
-	edma_write(cc, offset + ((i*2 + j) << 2), val);
-}
-static inline unsigned int edma_shadow0_read(struct edma *cc, int offset)
-{
-	return edma_read(cc, EDMA_SHADOW0 + offset);
-}
-static inline unsigned int edma_shadow0_read_array(struct edma *cc, int offset,
-		int i)
-{
-	return edma_read(cc, EDMA_SHADOW0 + offset + (i << 2));
-}
-static inline void edma_shadow0_write(struct edma *cc, int offset, unsigned val)
-{
-	edma_write(cc, EDMA_SHADOW0 + offset, val);
-}
-static inline void edma_shadow0_write_array(struct edma *cc, int offset, int i,
-		unsigned val)
-{
-	edma_write(cc, EDMA_SHADOW0 + offset + (i << 2), val);
-}
-static inline unsigned int edma_parm_read(struct edma *cc, int offset,
-		int param_no)
-{
-	return edma_read(cc, EDMA_PARM + offset + (param_no << 5));
-}
-static inline void edma_parm_write(struct edma *cc, int offset, int param_no,
-		unsigned val)
-{
-	edma_write(cc, EDMA_PARM + offset + (param_no << 5), val);
-}
-static inline void edma_parm_modify(struct edma *cc, int offset, int param_no,
-		unsigned and, unsigned or)
-{
-	edma_modify(cc, EDMA_PARM + offset + (param_no << 5), and, or);
-}
-static inline void edma_parm_and(struct edma *cc, int offset, int param_no,
-		unsigned and)
-{
-	edma_and(cc, EDMA_PARM + offset + (param_no << 5), and);
-}
-static inline void edma_parm_or(struct edma *cc, int offset, int param_no,
-		unsigned or)
-{
-	edma_or(cc, EDMA_PARM + offset + (param_no << 5), or);
-}
-
-static inline void set_bits(int offset, int len, unsigned long *p)
-{
-	for (; len > 0; len--)
-		set_bit(offset + (len - 1), p);
-}
-
-static inline void clear_bits(int offset, int len, unsigned long *p)
-{
-	for (; len > 0; len--)
-		clear_bit(offset + (len - 1), p);
-}
-
-/*****************************************************************************/
-static int arch_num_cc;
-
-/* dummy param set used to (re)initialize parameter RAM slots */
-static const struct edmacc_param dummy_paramset = {
-	.link_bcntrld = 0xffff,
-	.ccnt = 1,
-};
-
-static const struct of_device_id edma_of_ids[] = {
-	{ .compatible = "ti,edma3", },
-	{}
-};
-
-/*****************************************************************************/
-
-static void map_dmach_queue(struct edma *cc, unsigned ch_no,
-			    enum dma_event_q queue_no)
-{
-	int bit = (ch_no & 0x7) * 4;
-
-	/* default to low priority queue */
-	if (queue_no == EVENTQ_DEFAULT)
-		queue_no = cc->default_queue;
-
-	queue_no &= 7;
-	edma_modify_array(cc, EDMA_DMAQNUM, (ch_no >> 3),
-			  ~(0x7 << bit), queue_no << bit);
-}
-
-static void assign_priority_to_queue(struct edma *cc, int queue_no,
-				     int priority)
-{
-	int bit = queue_no * 4;
-	edma_modify(cc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
-}
-
-/**
- * map_dmach_param - Maps channel number to param entry number
- *
- * This maps the dma channel number to param entry numberter. In
- * other words using the DMA channel mapping registers a param entry
- * can be mapped to any channel
- *
- * Callers are responsible for ensuring the channel mapping logic is
- * included in that particular EDMA variant (Eg : dm646x)
- *
- */
-static void map_dmach_param(struct edma *cc)
-{
-	int i;
-	for (i = 0; i < EDMA_MAX_DMACH; i++)
-		edma_write_array(cc, EDMA_DCHMAP , i , (i << 5));
-}
-
-static inline void setup_dma_interrupt(struct edma *cc, unsigned lch,
-	void (*callback)(unsigned channel, u16 ch_status, void *data),
-	void *data)
-{
-	lch = EDMA_CHAN_SLOT(lch);
-
-	if (!callback)
-		edma_shadow0_write_array(cc, SH_IECR, lch >> 5,
-					 BIT(lch & 0x1f));
-
-	cc->intr_data[lch].callback = callback;
-	cc->intr_data[lch].data = data;
-
-	if (callback) {
-		edma_shadow0_write_array(cc, SH_ICR, lch >> 5, BIT(lch & 0x1f));
-		edma_shadow0_write_array(cc, SH_IESR, lch >> 5,
-					 BIT(lch & 0x1f));
-	}
-}
-
-/******************************************************************************
- *
- * DMA interrupt handler
- *
- *****************************************************************************/
-static irqreturn_t dma_irq_handler(int irq, void *data)
-{
-	struct edma *cc = data;
-	int ctlr;
-	u32 sh_ier;
-	u32 sh_ipr;
-	u32 bank;
-
-	ctlr = cc->id;
-	if (ctlr < 0)
-		return IRQ_NONE;
-
-	dev_dbg(cc->dev, "dma_irq_handler\n");
-
-	sh_ipr = edma_shadow0_read_array(cc, SH_IPR, 0);
-	if (!sh_ipr) {
-		sh_ipr = edma_shadow0_read_array(cc, SH_IPR, 1);
-		if (!sh_ipr)
-			return IRQ_NONE;
-		sh_ier = edma_shadow0_read_array(cc, SH_IER, 1);
-		bank = 1;
-	} else {
-		sh_ier = edma_shadow0_read_array(cc, SH_IER, 0);
-		bank = 0;
-	}
-
-	do {
-		u32 slot;
-		u32 channel;
-
-		dev_dbg(cc->dev, "IPR%d %08x\n", bank, sh_ipr);
-
-		slot = __ffs(sh_ipr);
-		sh_ipr &= ~(BIT(slot));
-
-		if (sh_ier & BIT(slot)) {
-			channel = (bank << 5) | slot;
-			/* Clear the corresponding IPR bits */
-			edma_shadow0_write_array(cc, SH_ICR, bank, BIT(slot));
-			if (cc->intr_data[channel].callback)
-				cc->intr_data[channel].callback(
-					EDMA_CTLR_CHAN(ctlr, channel),
-					EDMA_DMA_COMPLETE,
-					cc->intr_data[channel].data);
-		}
-	} while (sh_ipr);
-
-	edma_shadow0_write(cc, SH_IEVAL, 1);
-	return IRQ_HANDLED;
-}
-
-/******************************************************************************
- *
- * DMA error interrupt handler
- *
- *****************************************************************************/
-static irqreturn_t dma_ccerr_handler(int irq, void *data)
-{
-	struct edma *cc = data;
-	int i;
-	int ctlr;
-	unsigned int cnt = 0;
-
-	ctlr = cc->id;
-	if (ctlr < 0)
-		return IRQ_NONE;
-
-	dev_dbg(cc->dev, "dma_ccerr_handler\n");
-
-	if ((edma_read_array(cc, EDMA_EMR, 0) == 0) &&
-	    (edma_read_array(cc, EDMA_EMR, 1) == 0) &&
-	    (edma_read(cc, EDMA_QEMR) == 0) &&
-	    (edma_read(cc, EDMA_CCERR) == 0))
-		return IRQ_NONE;
-
-	while (1) {
-		int j = -1;
-		if (edma_read_array(cc, EDMA_EMR, 0))
-			j = 0;
-		else if (edma_read_array(cc, EDMA_EMR, 1))
-			j = 1;
-		if (j >= 0) {
-			dev_dbg(cc->dev, "EMR%d %08x\n", j,
-				edma_read_array(cc, EDMA_EMR, j));
-			for (i = 0; i < 32; i++) {
-				int k = (j << 5) + i;
-				if (edma_read_array(cc, EDMA_EMR, j) &
-							BIT(i)) {
-					/* Clear the corresponding EMR bits */
-					edma_write_array(cc, EDMA_EMCR, j,
-							 BIT(i));
-					/* Clear any SER */
-					edma_shadow0_write_array(cc, SH_SECR,
-								j, BIT(i));
-					if (cc->intr_data[k].callback) {
-						cc->intr_data[k].callback(
-							EDMA_CTLR_CHAN(ctlr, k),
-							EDMA_DMA_CC_ERROR,
-							cc->intr_data[k].data);
-					}
-				}
-			}
-		} else if (edma_read(cc, EDMA_QEMR)) {
-			dev_dbg(cc->dev, "QEMR %02x\n",
-				edma_read(cc, EDMA_QEMR));
-			for (i = 0; i < 8; i++) {
-				if (edma_read(cc, EDMA_QEMR) & BIT(i)) {
-					/* Clear the corresponding IPR bits */
-					edma_write(cc, EDMA_QEMCR, BIT(i));
-					edma_shadow0_write(cc, SH_QSECR,
-							   BIT(i));
-
-					/* NOTE:  not reported!! */
-				}
-			}
-		} else if (edma_read(cc, EDMA_CCERR)) {
-			dev_dbg(cc->dev, "CCERR %08x\n",
-				edma_read(cc, EDMA_CCERR));
-			/* FIXME:  CCERR.BIT(16) ignored!  much better
-			 * to just write CCERRCLR with CCERR value...
-			 */
-			for (i = 0; i < 8; i++) {
-				if (edma_read(cc, EDMA_CCERR) & BIT(i)) {
-					/* Clear the corresponding IPR bits */
-					edma_write(cc, EDMA_CCERRCLR, BIT(i));
-
-					/* NOTE:  not reported!! */
-				}
-			}
-		}
-		if ((edma_read_array(cc, EDMA_EMR, 0) == 0) &&
-		    (edma_read_array(cc, EDMA_EMR, 1) == 0) &&
-		    (edma_read(cc, EDMA_QEMR) == 0) &&
-		    (edma_read(cc, EDMA_CCERR) == 0))
-			break;
-		cnt++;
-		if (cnt > 10)
-			break;
-	}
-	edma_write(cc, EDMA_EEVAL, 1);
-	return IRQ_HANDLED;
-}
-
-static int prepare_unused_channel_list(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct edma *cc = data;
-	int i, count;
-	struct of_phandle_args  dma_spec;
-
-	if (dev->of_node) {
-		struct platform_device *dma_pdev;
-
-		count = of_property_count_strings(dev->of_node, "dma-names");
-		if (count < 0)
-			return 0;
-		for (i = 0; i < count; i++) {
-
-			if (of_parse_phandle_with_args(dev->of_node, "dmas",
-						       "#dma-cells", i,
-						       &dma_spec))
-				continue;
-
-			if (!of_match_node(edma_of_ids, dma_spec.np)) {
-				of_node_put(dma_spec.np);
-				continue;
-			}
-
-			dma_pdev = of_find_device_by_node(dma_spec.np);
-			if (&dma_pdev->dev != cc->dev)
-				continue;
-
-			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
-				  cc->edma_unused);
-			of_node_put(dma_spec.np);
-		}
-		return 0;
-	}
-
-	/* For non-OF case */
-	for (i = 0; i < pdev->num_resources; i++) {
-		struct resource	*res = &pdev->resource[i];
-
-		if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
-			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
-				  cc->edma_unused);
-		}
-	}
-
-	return 0;
-}
-
-/*-----------------------------------------------------------------------*/
-
-/* Resource alloc/free:  dma channels, parameter RAM slots */
-
-/**
- * edma_alloc_channel - allocate DMA channel and paired parameter RAM
- * @channel: specific channel to allocate; negative for "any unmapped channel"
- * @callback: optional; to be issued on DMA completion or errors
- * @data: passed to callback
- * @eventq_no: an EVENTQ_* constant, used to choose which Transfer
- *	Controller (TC) executes requests using this channel.  Use
- *	EVENTQ_DEFAULT unless you really need a high priority queue.
- *
- * This allocates a DMA channel and its associated parameter RAM slot.
- * The parameter RAM is initialized to hold a dummy transfer.
- *
- * Normal use is to pass a specific channel number as @channel, to make
- * use of hardware events mapped to that channel.  When the channel will
- * be used only for software triggering or event chaining, channels not
- * mapped to hardware events (or mapped to unused events) are preferable.
- *
- * DMA transfers start from a channel using edma_start(), or by
- * chaining.  When the transfer described in that channel's parameter RAM
- * slot completes, that slot's data may be reloaded through a link.
- *
- * DMA errors are only reported to the @callback associated with the
- * channel driving that transfer, but transfer completion callbacks can
- * be sent to another channel under control of the TCC field in
- * the option word of the transfer's parameter RAM set.  Drivers must not
- * use DMA transfer completion callbacks for channels they did not allocate.
- * (The same applies to TCC codes used in transfer chaining.)
- *
- * Returns the number of the channel, else negative errno.
- */
-int edma_alloc_channel(struct edma *cc, int channel,
-		void (*callback)(unsigned channel, u16 ch_status, void *data),
-		void *data,
-		enum dma_event_q eventq_no)
-{
-	unsigned done = 0;
-	int ret = 0;
-
-	if (!cc->unused_chan_list_done) {
-		/*
-		 * Scan all the platform devices to find out the EDMA channels
-		 * used and clear them in the unused list, making the rest
-		 * available for ARM usage.
-		 */
-		ret = bus_for_each_dev(&platform_bus_type, NULL, cc,
-				       prepare_unused_channel_list);
-		if (ret < 0)
-			return ret;
-
-		cc->unused_chan_list_done = true;
-	}
-
-	if (channel >= 0) {
-		if (cc->id != EDMA_CTLR(channel)) {
-			dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n",
-				__func__, cc->id, EDMA_CTLR(channel));
-			return -EINVAL;
-		}
-		channel = EDMA_CHAN_SLOT(channel);
-	}
-
-	if (channel < 0) {
-		channel = 0;
-		for (;;) {
-			channel = find_next_bit(cc->edma_unused,
-						cc->num_channels, channel);
-			if (channel == cc->num_channels)
-				break;
-			if (!test_and_set_bit(channel, cc->edma_inuse)) {
-				done = 1;
-				break;
-			}
-			channel++;
-		}
-		if (!done)
-			return -ENOMEM;
-	} else if (channel >= cc->num_channels) {
-		return -EINVAL;
-	} else if (test_and_set_bit(channel, cc->edma_inuse)) {
-		return -EBUSY;
-	}
-
-	/* ensure access through shadow region 0 */
-	edma_or_array2(cc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
-
-	/* ensure no events are pending */
-	edma_stop(cc, EDMA_CTLR_CHAN(cc->id, channel));
-	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
-		    PARM_SIZE);
-
-	if (callback)
-		setup_dma_interrupt(cc, EDMA_CTLR_CHAN(cc->id, channel),
-				    callback, data);
-
-	map_dmach_queue(cc, channel, eventq_no);
-
-	return EDMA_CTLR_CHAN(cc->id, channel);
-}
-EXPORT_SYMBOL(edma_alloc_channel);
-
-
-/**
- * edma_free_channel - deallocate DMA channel
- * @channel: dma channel returned from edma_alloc_channel()
- *
- * This deallocates the DMA channel and associated parameter RAM slot
- * allocated by edma_alloc_channel().
- *
- * Callers are responsible for ensuring the channel is inactive, and
- * will not be reactivated by linking, chaining, or software calls to
- * edma_start().
- */
-void edma_free_channel(struct edma *cc, unsigned channel)
-{
-
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel >= cc->num_channels)
-		return;
-
-	setup_dma_interrupt(cc, channel, NULL, NULL);
-	/* REVISIT should probably take out of shadow region 0 */
-
-	memcpy_toio(cc->base + PARM_OFFSET(channel), &dummy_paramset,
-		    PARM_SIZE);
-	clear_bit(channel, cc->edma_inuse);
-}
-EXPORT_SYMBOL(edma_free_channel);
-
-/**
- * edma_alloc_slot - allocate DMA parameter RAM
- * @slot: specific slot to allocate; negative for "any unused slot"
- *
- * This allocates a parameter RAM slot, initializing it to hold a
- * dummy transfer.  Slots allocated using this routine have not been
- * mapped to a hardware DMA channel, and will normally be used by
- * linking to them from a slot associated with a DMA channel.
- *
- * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
- * slots may be allocated on behalf of DSP firmware.
- *
- * Returns the number of the slot, else negative errno.
- */
-int edma_alloc_slot(struct edma *cc, int slot)
-{
-	if (slot > 0)
-		slot = EDMA_CHAN_SLOT(slot);
-	if (slot < 0) {
-		slot = cc->num_channels;
-		for (;;) {
-			slot = find_next_zero_bit(cc->edma_inuse, cc->num_slots,
-						  slot);
-			if (slot == cc->num_slots)
-				return -ENOMEM;
-			if (!test_and_set_bit(slot, cc->edma_inuse))
-				break;
-		}
-	} else if (slot < cc->num_channels || slot >= cc->num_slots) {
-		return -EINVAL;
-	} else if (test_and_set_bit(slot, cc->edma_inuse)) {
-		return -EBUSY;
-	}
-
-	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
-
-	return slot;
-}
-EXPORT_SYMBOL(edma_alloc_slot);
-
-/**
- * edma_free_slot - deallocate DMA parameter RAM
- * @slot: parameter RAM slot returned from edma_alloc_slot()
- *
- * This deallocates the parameter RAM slot allocated by edma_alloc_slot().
- * Callers are responsible for ensuring the slot is inactive, and will
- * not be activated.
- */
-void edma_free_slot(struct edma *cc, unsigned slot)
-{
-
-	slot = EDMA_CHAN_SLOT(slot);
-	if (slot < cc->num_channels || slot >= cc->num_slots)
-		return;
-
-	memcpy_toio(cc->base + PARM_OFFSET(slot), &dummy_paramset, PARM_SIZE);
-	clear_bit(slot, cc->edma_inuse);
-}
-EXPORT_SYMBOL(edma_free_slot);
-
-/*-----------------------------------------------------------------------*/
-
-/* Parameter RAM operations (i) -- read/write partial slots */
-
-/**
- * edma_get_position - returns the current transfer point
- * @slot: parameter RAM slot being examined
- * @dst:  true selects the dest position, false the source
- *
- * Returns the position of the current active slot
- */
-dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst)
-{
-	u32 offs;
-
-	slot = EDMA_CHAN_SLOT(slot);
-	offs = PARM_OFFSET(slot);
-	offs += dst ? PARM_DST : PARM_SRC;
-
-	return edma_read(cc, offs);
-}
-
-/**
- * edma_link - link one parameter RAM slot to another
- * @from: parameter RAM slot originating the link
- * @to: parameter RAM slot which is the link target
- *
- * The originating slot should not be part of any active DMA transfer.
- */
-void edma_link(struct edma *cc, unsigned from, unsigned to)
-{
-	from = EDMA_CHAN_SLOT(from);
-	to = EDMA_CHAN_SLOT(to);
-	if (from >= cc->num_slots || to >= cc->num_slots)
-		return;
-
-	edma_parm_modify(cc, PARM_LINK_BCNTRLD, from, 0xffff0000,
-			 PARM_OFFSET(to));
-}
-EXPORT_SYMBOL(edma_link);
-
-/*-----------------------------------------------------------------------*/
-
-/* Parameter RAM operations (ii) -- read/write whole parameter sets */
-
-/**
- * edma_write_slot - write parameter RAM data for slot
- * @slot: number of parameter RAM slot being modified
- * @param: data to be written into parameter RAM slot
- *
- * Use this to assign all parameters of a transfer at once.  This
- * allows more efficient setup of transfers than issuing multiple
- * calls to set up those parameters in small pieces, and provides
- * complete control over all transfer options.
- */
-void edma_write_slot(struct edma *cc, unsigned slot,
-		     const struct edmacc_param *param)
-{
-	slot = EDMA_CHAN_SLOT(slot);
-	if (slot >= cc->num_slots)
-		return;
-	memcpy_toio(cc->base + PARM_OFFSET(slot), param, PARM_SIZE);
-}
-EXPORT_SYMBOL(edma_write_slot);
-
-/**
- * edma_read_slot - read parameter RAM data from slot
- * @slot: number of parameter RAM slot being copied
- * @param: where to store copy of parameter RAM data
- *
- * Use this to read data from a parameter RAM slot, perhaps to
- * save them as a template for later reuse.
- */
-void edma_read_slot(struct edma *cc, unsigned slot, struct edmacc_param *param)
-{
-	slot = EDMA_CHAN_SLOT(slot);
-	if (slot >= cc->num_slots)
-		return;
-	memcpy_fromio(param, cc->base + PARM_OFFSET(slot), PARM_SIZE);
-}
-EXPORT_SYMBOL(edma_read_slot);
-
-/*-----------------------------------------------------------------------*/
-
-/* Various EDMA channel control operations */
-
-/**
- * edma_pause - pause dma on a channel
- * @channel: on which edma_start() has been called
- *
- * This temporarily disables EDMA hardware events on the specified channel,
- * preventing them from triggering new transfers on its behalf
- */
-void edma_pause(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		unsigned int mask = BIT(channel & 0x1f);
-
-		edma_shadow0_write_array(cc, SH_EECR, channel >> 5, mask);
-	}
-}
-EXPORT_SYMBOL(edma_pause);
-
-/**
- * edma_resume - resumes dma on a paused channel
- * @channel: on which edma_pause() has been called
- *
- * This re-enables EDMA hardware events on the specified channel.
- */
-void edma_resume(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		unsigned int mask = BIT(channel & 0x1f);
-
-		edma_shadow0_write_array(cc, SH_EESR, channel >> 5, mask);
-	}
-}
-EXPORT_SYMBOL(edma_resume);
-
-int edma_trigger_channel(struct edma *cc, unsigned channel)
-{
-	unsigned int mask;
-
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return -EINVAL;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-	mask = BIT(channel & 0x1f);
-
-	edma_shadow0_write_array(cc, SH_ESR, (channel >> 5), mask);
-
-	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
-		 edma_shadow0_read_array(cc, SH_ESR, (channel >> 5)));
-	return 0;
-}
-EXPORT_SYMBOL(edma_trigger_channel);
-
-/**
- * edma_start - start dma on a channel
- * @channel: channel being activated
- *
- * Channels with event associations will be triggered by their hardware
- * events, and channels without such associations will be triggered by
- * software.  (At this writing there is no interface for using software
- * triggers except with channels that don't support hardware triggers.)
- *
- * Returns zero on success, else negative errno.
- */
-int edma_start(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return -EINVAL;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		int j = channel >> 5;
-		unsigned int mask = BIT(channel & 0x1f);
-
-		/* EDMA channels without event association */
-		if (test_bit(channel, cc->edma_unused)) {
-			pr_debug("EDMA: ESR%d %08x\n", j,
-				 edma_shadow0_read_array(cc, SH_ESR, j));
-			edma_shadow0_write_array(cc, SH_ESR, j, mask);
-			return 0;
-		}
-
-		/* EDMA channel with event association */
-		pr_debug("EDMA: ER%d %08x\n", j,
-			edma_shadow0_read_array(cc, SH_ER, j));
-		/* Clear any pending event or error */
-		edma_write_array(cc, EDMA_ECR, j, mask);
-		edma_write_array(cc, EDMA_EMCR, j, mask);
-		/* Clear any SER */
-		edma_shadow0_write_array(cc, SH_SECR, j, mask);
-		edma_shadow0_write_array(cc, SH_EESR, j, mask);
-		pr_debug("EDMA: EER%d %08x\n", j,
-			 edma_shadow0_read_array(cc, SH_EER, j));
-		return 0;
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL(edma_start);
-
-/**
- * edma_stop - stops dma on the channel passed
- * @channel: channel being deactivated
- *
- * When @lch is a channel, any active transfer is paused and
- * all pending hardware events are cleared.  The current transfer
- * may not be resumed, and the channel's Parameter RAM should be
- * reinitialized before being reused.
- */
-void edma_stop(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		int j = channel >> 5;
-		unsigned int mask = BIT(channel & 0x1f);
-
-		edma_shadow0_write_array(cc, SH_EECR, j, mask);
-		edma_shadow0_write_array(cc, SH_ECR, j, mask);
-		edma_shadow0_write_array(cc, SH_SECR, j, mask);
-		edma_write_array(cc, EDMA_EMCR, j, mask);
-
-		/* clear possibly pending completion interrupt */
-		edma_shadow0_write_array(cc, SH_ICR, j, mask);
-
-		pr_debug("EDMA: EER%d %08x\n", j,
-			 edma_shadow0_read_array(cc, SH_EER, j));
-
-		/* REVISIT:  consider guarding against inappropriate event
-		 * chaining by overwriting with dummy_paramset.
-		 */
-	}
-}
-EXPORT_SYMBOL(edma_stop);
-
-/******************************************************************************
- *
- * It cleans ParamEntry qand bring back EDMA to initial state if media has
- * been removed before EDMA has finished.It is usedful for removable media.
- * Arguments:
- *      ch_no     - channel no
- *
- * Return: zero on success, or corresponding error no on failure
- *
- * FIXME this should not be needed ... edma_stop() should suffice.
- *
- *****************************************************************************/
-
-void edma_clean_channel(struct edma *cc, unsigned channel)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel < cc->num_channels) {
-		int j = (channel >> 5);
-		unsigned int mask = BIT(channel & 0x1f);
-
-		pr_debug("EDMA: EMR%d %08x\n", j,
-			 edma_read_array(cc, EDMA_EMR, j));
-		edma_shadow0_write_array(cc, SH_ECR, j, mask);
-		/* Clear the corresponding EMR bits */
-		edma_write_array(cc, EDMA_EMCR, j, mask);
-		/* Clear any SER */
-		edma_shadow0_write_array(cc, SH_SECR, j, mask);
-		edma_write(cc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
-	}
-}
-EXPORT_SYMBOL(edma_clean_channel);
-
-/*
- * edma_assign_channel_eventq - move given channel to desired eventq
- * Arguments:
- *	channel - channel number
- *	eventq_no - queue to move the channel
- *
- * Can be used to move a channel to a selected event queue.
- */
-void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
-				enum dma_event_q eventq_no)
-{
-	if (cc->id != EDMA_CTLR(channel)) {
-		dev_err(cc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
-			cc->id, EDMA_CTLR(channel));
-		return;
-	}
-	channel = EDMA_CHAN_SLOT(channel);
-
-	if (channel >= cc->num_channels)
-		return;
-
-	/* default to low priority queue */
-	if (eventq_no == EVENTQ_DEFAULT)
-		eventq_no = cc->default_queue;
-	if (eventq_no >= cc->num_tc)
-		return;
-
-	map_dmach_queue(cc, channel, eventq_no);
-}
-EXPORT_SYMBOL(edma_assign_channel_eventq);
-
-struct edma *edma_get_data(struct device *edma_dev)
-{
-	return dev_get_drvdata(edma_dev);
-}
-
-
-static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
-			      struct edma *edma_cc, int cc_id)
-{
-	int i;
-	u32 value, cccfg;
-	s8 (*queue_priority_map)[2];
-
-	/* Decode the eDMA3 configuration from CCCFG register */
-	cccfg = edma_read(edma_cc, EDMA_CCCFG);
-
-	value = GET_NUM_REGN(cccfg);
-	edma_cc->num_region = BIT(value);
-
-	value = GET_NUM_DMACH(cccfg);
-	edma_cc->num_channels = BIT(value + 1);
-
-	value = GET_NUM_PAENTRY(cccfg);
-	edma_cc->num_slots = BIT(value + 4);
-
-	value = GET_NUM_EVQUE(cccfg);
-	edma_cc->num_tc = value + 1;
-
-	dev_dbg(dev, "eDMA3 CC%d HW configuration (cccfg: 0x%08x):\n", cc_id,
-		cccfg);
-	dev_dbg(dev, "num_region: %u\n", edma_cc->num_region);
-	dev_dbg(dev, "num_channel: %u\n", edma_cc->num_channels);
-	dev_dbg(dev, "num_slot: %u\n", edma_cc->num_slots);
-	dev_dbg(dev, "num_tc: %u\n", edma_cc->num_tc);
-
-	/* Nothing need to be done if queue priority is provided */
-	if (pdata->queue_priority_mapping)
-		return 0;
-
-	/*
-	 * Configure TC/queue priority as follows:
-	 * Q0 - priority 0
-	 * Q1 - priority 1
-	 * Q2 - priority 2
-	 * ...
-	 * The meaning of priority numbers: 0 highest priority, 7 lowest
-	 * priority. So Q0 is the highest priority queue and the last queue has
-	 * the lowest priority.
-	 */
-	queue_priority_map = devm_kzalloc(dev,
-					  (edma_cc->num_tc + 1) * sizeof(s8),
-					  GFP_KERNEL);
-	if (!queue_priority_map)
-		return -ENOMEM;
-
-	for (i = 0; i < edma_cc->num_tc; i++) {
-		queue_priority_map[i][0] = i;
-		queue_priority_map[i][1] = i;
-	}
-	queue_priority_map[i][0] = -1;
-	queue_priority_map[i][1] = -1;
-
-	pdata->queue_priority_mapping = queue_priority_map;
-	/* Default queue has the lowest priority */
-	pdata->default_queue = i - 1;
-
-	return 0;
-}
-
-#if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_DMADEVICES)
-
-static int edma_xbar_event_map(struct device *dev, struct device_node *node,
-			       struct edma_soc_info *pdata, size_t sz)
-{
-	const char pname[] = "ti,edma-xbar-event-map";
-	struct resource res;
-	void __iomem *xbar;
-	s16 (*xbar_chans)[2];
-	size_t nelm = sz / sizeof(s16);
-	u32 shift, offset, mux;
-	int ret, i;
-
-	xbar_chans = devm_kzalloc(dev, (nelm + 2) * sizeof(s16), GFP_KERNEL);
-	if (!xbar_chans)
-		return -ENOMEM;
-
-	ret = of_address_to_resource(node, 1, &res);
-	if (ret)
-		return -ENOMEM;
-
-	xbar = devm_ioremap(dev, res.start, resource_size(&res));
-	if (!xbar)
-		return -ENOMEM;
-
-	ret = of_property_read_u16_array(node, pname, (u16 *)xbar_chans, nelm);
-	if (ret)
-		return -EIO;
-
-	/* Invalidate last entry for the other user of this mess */
-	nelm >>= 1;
-	xbar_chans[nelm][0] = xbar_chans[nelm][1] = -1;
-
-	for (i = 0; i < nelm; i++) {
-		shift = (xbar_chans[i][1] & 0x03) << 3;
-		offset = xbar_chans[i][1] & 0xfffffffc;
-		mux = readl(xbar + offset);
-		mux &= ~(0xff << shift);
-		mux |= xbar_chans[i][0] << shift;
-		writel(mux, (xbar + offset));
-	}
-
-	pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
-	return 0;
-}
-
-static int edma_of_parse_dt(struct device *dev,
-			    struct device_node *node,
-			    struct edma_soc_info *pdata)
-{
-	int ret = 0;
-	struct property *prop;
-	size_t sz;
-	struct edma_rsv_info *rsv_info;
-
-	rsv_info = devm_kzalloc(dev, sizeof(struct edma_rsv_info), GFP_KERNEL);
-	if (!rsv_info)
-		return -ENOMEM;
-	pdata->rsv = rsv_info;
-
-	prop = of_find_property(node, "ti,edma-xbar-event-map", &sz);
-	if (prop)
-		ret = edma_xbar_event_map(dev, node, pdata, sz);
-
-	return ret;
-}
-
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
-						      struct device_node *node)
-{
-	struct edma_soc_info *info;
-	int ret;
-
-	info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
-	if (!info)
-		return ERR_PTR(-ENOMEM);
-
-	ret = edma_of_parse_dt(dev, node, info);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return info;
-}
-#else
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
-						      struct device_node *node)
-{
-	return ERR_PTR(-ENOSYS);
-}
-#endif
-
-static int edma_probe(struct platform_device *pdev)
-{
-	struct edma_soc_info	*info = pdev->dev.platform_data;
-	s8		(*queue_priority_mapping)[2];
-	int			i, off, ln;
-	const s16		(*rsv_chans)[2];
-	const s16		(*rsv_slots)[2];
-	const s16		(*xbar_chans)[2];
-	int			irq;
-	char			*irq_name;
-	struct resource		*mem;
-	struct device_node	*node = pdev->dev.of_node;
-	struct device		*dev = &pdev->dev;
-	int			dev_id = pdev->id;
-	struct edma		*cc;
-	int			ret;
-	struct platform_device_info edma_dev_info = {
-		.name = "edma-dma-engine",
-		.dma_mask = DMA_BIT_MASK(32),
-		.parent = &pdev->dev,
-	};
-
-	if (node) {
-		info = edma_setup_info_from_dt(dev, node);
-		if (IS_ERR(info)) {
-			dev_err(dev, "failed to get DT data\n");
-			return PTR_ERR(info);
-		}
-	}
-
-	if (!info)
-		return -ENODEV;
-
-	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0) {
-		dev_err(dev, "pm_runtime_get_sync() failed\n");
-		return ret;
-	}
-
-	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc");
-	if (!mem) {
-		dev_dbg(dev, "mem resource not found, using index 0\n");
-		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		if (!mem) {
-			dev_err(dev, "no mem resource?\n");
-			return -ENODEV;
-		}
-	}
-
-	cc = devm_kzalloc(dev, sizeof(struct edma), GFP_KERNEL);
-	if (!cc)
-		return -ENOMEM;
-
-	cc->dev = dev;
-	cc->id = dev_id;
-	/* When booting with DT the pdev->id is -1 */
-	if (dev_id < 0) {
-		cc->id = 0;
-		dev_id = arch_num_cc;
-	}
-	dev_set_drvdata(dev, cc);
-
-	cc->base = devm_ioremap_resource(dev, mem);
-	if (IS_ERR(cc->base))
-		return PTR_ERR(cc->base);
-
-	/* Get eDMA3 configuration from IP */
-	ret = edma_setup_from_hw(dev, info, cc, dev_id);
-	if (ret)
-		return ret;
-
-	cc->default_queue = info->default_queue;
-
-	for (i = 0; i < cc->num_slots; i++)
-		memcpy_toio(cc->base + PARM_OFFSET(i), &dummy_paramset,
-			    PARM_SIZE);
-
-	/* Mark all channels as unused */
-	memset(cc->edma_unused, 0xff, sizeof(cc->edma_unused));
-
-	if (info->rsv) {
-
-		/* Clear the reserved channels in unused list */
-		rsv_chans = info->rsv->rsv_chans;
-		if (rsv_chans) {
-			for (i = 0; rsv_chans[i][0] != -1; i++) {
-				off = rsv_chans[i][0];
-				ln = rsv_chans[i][1];
-				clear_bits(off, ln, cc->edma_unused);
-			}
-		}
-
-		/* Set the reserved slots in inuse list */
-		rsv_slots = info->rsv->rsv_slots;
-		if (rsv_slots) {
-			for (i = 0; rsv_slots[i][0] != -1; i++) {
-				off = rsv_slots[i][0];
-				ln = rsv_slots[i][1];
-				set_bits(off, ln, cc->edma_inuse);
-			}
-		}
-	}
-
-	/* Clear the xbar mapped channels in unused list */
-	xbar_chans = info->xbar_chans;
-	if (xbar_chans) {
-		for (i = 0; xbar_chans[i][1] != -1; i++) {
-			off = xbar_chans[i][1];
-			clear_bits(off, 1, cc->edma_unused);
-		}
-	}
-
-	irq = platform_get_irq_byname(pdev, "edma3_ccint");
-	if (irq < 0 && node)
-		irq = irq_of_parse_and_map(node, 0);
-
-	if (irq >= 0) {
-		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
-					  dev_name(dev));
-		ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
-				       cc);
-		if (ret) {
-			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
-			return ret;
-		}
-	}
-
-	irq = platform_get_irq_byname(pdev, "edma3_ccerrint");
-	if (irq < 0 && node)
-		irq = irq_of_parse_and_map(node, 2);
-
-	if (irq >= 0) {
-		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
-					  dev_name(dev));
-		ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
-				       cc);
-		if (ret) {
-			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
-			return ret;
-		}
-	}
-
-	for (i = 0; i < cc->num_channels; i++)
-		map_dmach_queue(cc, i, info->default_queue);
-
-	queue_priority_mapping = info->queue_priority_mapping;
-
-	/* Event queue priority mapping */
-	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
-		assign_priority_to_queue(cc, queue_priority_mapping[i][0],
-					 queue_priority_mapping[i][1]);
-
-	/* Map the channel to param entry if channel mapping logic exist */
-	if (edma_read(cc, EDMA_CCCFG) & CHMAP_EXIST)
-		map_dmach_param(cc);
-
-	for (i = 0; i < cc->num_region; i++) {
-		edma_write_array2(cc, EDMA_DRAE, i, 0, 0x0);
-		edma_write_array2(cc, EDMA_DRAE, i, 1, 0x0);
-		edma_write_array(cc, EDMA_QRAE, i, 0x0);
-	}
-	cc->info = info;
-	arch_num_cc++;
-
-	edma_dev_info.id = dev_id;
-
-	platform_device_register_full(&edma_dev_info);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int edma_pm_resume(struct device *dev)
-{
-	struct edma *cc = dev_get_drvdata(dev);
-	int i;
-	s8 (*queue_priority_mapping)[2];
-
-	queue_priority_mapping = cc->info->queue_priority_mapping;
-
-	/* Event queue priority mapping */
-	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
-		assign_priority_to_queue(cc, queue_priority_mapping[i][0],
-					 queue_priority_mapping[i][1]);
-
-	/* Map the channel to param entry if channel mapping logic */
-	if (edma_read(cc, EDMA_CCCFG) & CHMAP_EXIST)
-		map_dmach_param(cc);
-
-	for (i = 0; i < cc->num_channels; i++) {
-		if (test_bit(i, cc->edma_inuse)) {
-			/* ensure access through shadow region 0 */
-			edma_or_array2(cc, EDMA_DRAE, 0, i >> 5, BIT(i & 0x1f));
-
-			setup_dma_interrupt(cc, EDMA_CTLR_CHAN(cc->id, i),
-					    cc->intr_data[i].callback,
-					    cc->intr_data[i].data);
-		}
-	}
-
-	return 0;
-}
-#endif
-
-static const struct dev_pm_ops edma_pm_ops = {
-	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
-};
-
-static struct platform_driver edma_driver = {
-	.driver = {
-		.name	= "edma",
-		.pm	= &edma_pm_ops,
-		.of_match_table = edma_of_ids,
-	},
-	.probe = edma_probe,
-};
-
-static int __init edma_init(void)
-{
-	return platform_driver_probe(&edma_driver, edma_probe);
-}
-arch_initcall(edma_init);
-
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 07d2e100caab..e0b6736db984 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -90,7 +90,6 @@ config ARCH_OMAP2PLUS
 	select OMAP_GPMC
 	select PINCTRL
 	select SOC_BUS
-	select TI_PRIV_EDMA
 	select OMAP_IRQCHIP
 	help
 	  Systems based on OMAP2, OMAP3, OMAP4 or OMAP5
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index b4584757dae0..992efc8e465e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -486,7 +486,6 @@ config TI_EDMA
 	depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
-	select TI_PRIV_EDMA
 	default n
 	help
 	  Enable support for the TI EDMA controller. This DMA
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index fc91ab9dd1bb..aeb67e0cc523 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -26,12 +26,92 @@
 #include <linux/spinlock.h>
 #include <linux/of.h>
 #include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/platform_data/edma.h>
 
 #include "dmaengine.h"
 #include "virt-dma.h"
 
+/* Offsets matching "struct edmacc_param" */
+#define PARM_OPT		0x00
+#define PARM_SRC		0x04
+#define PARM_A_B_CNT		0x08
+#define PARM_DST		0x0c
+#define PARM_SRC_DST_BIDX	0x10
+#define PARM_LINK_BCNTRLD	0x14
+#define PARM_SRC_DST_CIDX	0x18
+#define PARM_CCNT		0x1c
+
+#define PARM_SIZE		0x20
+
+/* Offsets for EDMA CC global channel registers and their shadows */
+#define SH_ER			0x00	/* 64 bits */
+#define SH_ECR			0x08	/* 64 bits */
+#define SH_ESR			0x10	/* 64 bits */
+#define SH_CER			0x18	/* 64 bits */
+#define SH_EER			0x20	/* 64 bits */
+#define SH_EECR			0x28	/* 64 bits */
+#define SH_EESR			0x30	/* 64 bits */
+#define SH_SER			0x38	/* 64 bits */
+#define SH_SECR			0x40	/* 64 bits */
+#define SH_IER			0x50	/* 64 bits */
+#define SH_IECR			0x58	/* 64 bits */
+#define SH_IESR			0x60	/* 64 bits */
+#define SH_IPR			0x68	/* 64 bits */
+#define SH_ICR			0x70	/* 64 bits */
+#define SH_IEVAL		0x78
+#define SH_QER			0x80
+#define SH_QEER			0x84
+#define SH_QEECR		0x88
+#define SH_QEESR		0x8c
+#define SH_QSER			0x90
+#define SH_QSECR		0x94
+#define SH_SIZE			0x200
+
+/* Offsets for EDMA CC global registers */
+#define EDMA_REV		0x0000
+#define EDMA_CCCFG		0x0004
+#define EDMA_QCHMAP		0x0200	/* 8 registers */
+#define EDMA_DMAQNUM		0x0240	/* 8 registers (4 on OMAP-L1xx) */
+#define EDMA_QDMAQNUM		0x0260
+#define EDMA_QUETCMAP		0x0280
+#define EDMA_QUEPRI		0x0284
+#define EDMA_EMR		0x0300	/* 64 bits */
+#define EDMA_EMCR		0x0308	/* 64 bits */
+#define EDMA_QEMR		0x0310
+#define EDMA_QEMCR		0x0314
+#define EDMA_CCERR		0x0318
+#define EDMA_CCERRCLR		0x031c
+#define EDMA_EEVAL		0x0320
+#define EDMA_DRAE		0x0340	/* 4 x 64 bits*/
+#define EDMA_QRAE		0x0380	/* 4 registers */
+#define EDMA_QUEEVTENTRY	0x0400	/* 2 x 16 registers */
+#define EDMA_QSTAT		0x0600	/* 2 registers */
+#define EDMA_QWMTHRA		0x0620
+#define EDMA_QWMTHRB		0x0624
+#define EDMA_CCSTAT		0x0640
+
+#define EDMA_M			0x1000	/* global channel registers */
+#define EDMA_ECR		0x1008
+#define EDMA_ECRH		0x100C
+#define EDMA_SHADOW0		0x2000	/* 4 shadow regions */
+#define EDMA_PARM		0x4000	/* PaRAM entries */
+
+#define PARM_OFFSET(param_no)	(EDMA_PARM + ((param_no) << 5))
+
+#define EDMA_DCHMAP		0x0100  /* 64 registers */
+
+/* CCCFG register */
+#define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
+#define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
+#define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
+#define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
+#define CHMAP_EXIST		BIT(24)
+
 /*
  * This will go away when the private EDMA API is folded
  * into this driver and the platform device(s) are
@@ -60,6 +140,47 @@
 #define EDMA_MAX_SLOTS		MAX_NR_SG
 #define EDMA_DESCRIPTORS	16
 
+#define EDMA_MAX_PARAMENTRY     512
+
+#define EDMA_CHANNEL_ANY		-1	/* for edma_alloc_channel() */
+#define EDMA_SLOT_ANY			-1	/* for edma_alloc_slot() */
+#define EDMA_CONT_PARAMS_ANY		 1001
+#define EDMA_CONT_PARAMS_FIXED_EXACT	 1002
+#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
+
+#define EDMA_MAX_CC               2
+
+/* PaRAM slots are laid out like this */
+struct edmacc_param {
+	u32 opt;
+	u32 src;
+	u32 a_b_cnt;
+	u32 dst;
+	u32 src_dst_bidx;
+	u32 link_bcntrld;
+	u32 src_dst_cidx;
+	u32 ccnt;
+} __packed;
+
+/* fields in edmacc_param.opt */
+#define SAM		BIT(0)
+#define DAM		BIT(1)
+#define SYNCDIM		BIT(2)
+#define STATIC		BIT(3)
+#define EDMA_FWID	(0x07 << 8)
+#define TCCMODE		BIT(11)
+#define EDMA_TCC(t)	((t) << 12)
+#define TCINTEN		BIT(20)
+#define ITCINTEN	BIT(21)
+#define TCCHEN		BIT(22)
+#define ITCCHEN		BIT(23)
+
+/*ch_status parameter of callback function possible values*/
+#define EDMA_DMA_COMPLETE 1
+#define EDMA_DMA_CC_ERROR 2
+#define EDMA_DMA_TC1_ERROR 3
+#define EDMA_DMA_TC2_ERROR 4
+
 struct edma_pset {
 	u32				len;
 	dma_addr_t			addr;
@@ -119,14 +240,929 @@ struct edma_chan {
 };
 
 struct edma_cc {
-	struct edma			*cc;
-	int				ctlr;
+	struct device			*dev;
+	struct edma_soc_info		*info;
+	void __iomem			*base;
+	int				id;
+
+	/* eDMA3 resource information */
+	unsigned			num_channels;
+	unsigned			num_region;
+	unsigned			num_slots;
+	unsigned			num_tc;
+	enum dma_event_q		default_queue;
+
+	bool				unused_chan_list_done;
+	/* The edma_inuse bit for each PaRAM slot is clear unless the
+	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
+	 */
+	DECLARE_BITMAP(edma_inuse, EDMA_MAX_PARAMENTRY);
+
+	/* The edma_unused bit for each channel is clear unless
+	 * it is not being used on this platform. It uses a bit
+	 * of SOC-specific initialization code.
+	 */
+	DECLARE_BITMAP(edma_unused, EDMA_CHANS);
+
+	struct dma_interrupt_data {
+		void (*callback)(unsigned channel, unsigned short ch_status,
+				 void *data);
+		void *data;
+	} intr_data[EDMA_CHANS];
+
 	struct dma_device		dma_slave;
 	struct edma_chan		slave_chans[EDMA_CHANS];
-	int				num_slave_chans;
 	int				dummy_slot;
 };
 
+/* dummy param set used to (re)initialize parameter RAM slots */
+static const struct edmacc_param dummy_paramset = {
+	.link_bcntrld = 0xffff,
+	.ccnt = 1,
+};
+
+static const struct of_device_id edma_of_ids[] = {
+	{ .compatible = "ti,edma3", },
+	{}
+};
+
+static inline unsigned int edma_read(struct edma_cc *ecc, int offset)
+{
+	return (unsigned int)__raw_readl(ecc->base + offset);
+}
+
+static inline void edma_write(struct edma_cc *ecc, int offset, int val)
+{
+	__raw_writel(val, ecc->base + offset);
+}
+
+static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and,
+			       unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline unsigned int edma_read_array(struct edma_cc *ecc, int offset,
+					   int i)
+{
+	return edma_read(ecc, offset + (i << 2));
+}
+
+static inline void edma_write_array(struct edma_cc *ecc, int offset, int i,
+				    unsigned val)
+{
+	edma_write(ecc, offset + (i << 2), val);
+}
+
+static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i,
+				     unsigned and, unsigned or)
+{
+	edma_modify(ecc, offset + (i << 2), and, or);
+}
+
+static inline void edma_or_array(struct edma_cc *ecc, int offset, int i,
+				 unsigned or)
+{
+	edma_or(ecc, offset + (i << 2), or);
+}
+
+static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j,
+				  unsigned or)
+{
+	edma_or(ecc, offset + ((i * 2 + j) << 2), or);
+}
+
+static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i,
+				     int j, unsigned val)
+{
+	edma_write(ecc, offset + ((i * 2 + j) << 2), val);
+}
+
+static inline unsigned int edma_shadow0_read(struct edma_cc *ecc, int offset)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset);
+}
+
+static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc,
+						   int offset, int i)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset + (i << 2));
+}
+
+static inline void edma_shadow0_write(struct edma_cc *ecc, int offset,
+				      unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset, val);
+}
+
+static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset,
+					    int i, unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val);
+}
+
+static inline unsigned int edma_parm_read(struct edma_cc *ecc, int offset,
+					  int param_no)
+{
+	return edma_read(ecc, EDMA_PARM + offset + (param_no << 5));
+}
+
+static inline void edma_parm_write(struct edma_cc *ecc, int offset,
+				   int param_no, unsigned val)
+{
+	edma_write(ecc, EDMA_PARM + offset + (param_no << 5), val);
+}
+
+static inline void edma_parm_modify(struct edma_cc *ecc, int offset,
+				    int param_no, unsigned and, unsigned or)
+{
+	edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or);
+}
+
+static inline void edma_parm_and(struct edma_cc *ecc, int offset, int param_no,
+				 unsigned and)
+{
+	edma_and(ecc, EDMA_PARM + offset + (param_no << 5), and);
+}
+
+static inline void edma_parm_or(struct edma_cc *ecc, int offset, int param_no,
+				unsigned or)
+{
+	edma_or(ecc, EDMA_PARM + offset + (param_no << 5), or);
+}
+
+static inline void set_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		set_bit(offset + (len - 1), p);
+}
+
+static inline void clear_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		clear_bit(offset + (len - 1), p);
+}
+
+static void edma_map_dmach_to_queue(struct edma_cc *ecc, unsigned ch_no,
+				    enum dma_event_q queue_no)
+{
+	int bit = (ch_no & 0x7) * 4;
+
+	/* default to low priority queue */
+	if (queue_no == EVENTQ_DEFAULT)
+		queue_no = ecc->default_queue;
+
+	queue_no &= 7;
+	edma_modify_array(ecc, EDMA_DMAQNUM, (ch_no >> 3), ~(0x7 << bit),
+			  queue_no << bit);
+}
+
+static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no,
+					  int priority)
+{
+	int bit = queue_no * 4;
+
+	edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
+}
+
+static void edma_direct_dmach_to_param_mapping(struct edma_cc *ecc)
+{
+	int i;
+
+	for (i = 0; i < ecc->num_channels; i++)
+		edma_write_array(ecc, EDMA_DCHMAP, i, (i << 5));
+}
+
+static int prepare_unused_channel_list(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct edma_cc *ecc = data;
+	int i, count;
+	struct of_phandle_args  dma_spec;
+
+	if (dev->of_node) {
+		struct platform_device *dma_pdev;
+
+		count = of_property_count_strings(dev->of_node, "dma-names");
+		if (count < 0)
+			return 0;
+		for (i = 0; i < count; i++) {
+			if (of_parse_phandle_with_args(dev->of_node, "dmas",
+						       "#dma-cells", i,
+						       &dma_spec))
+				continue;
+
+			if (!of_match_node(edma_of_ids, dma_spec.np)) {
+				of_node_put(dma_spec.np);
+				continue;
+			}
+
+			dma_pdev = of_find_device_by_node(dma_spec.np);
+			if (&dma_pdev->dev != ecc->dev)
+				continue;
+
+			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
+				  ecc->edma_unused);
+			of_node_put(dma_spec.np);
+		}
+		return 0;
+	}
+
+	/* For non-OF case */
+	for (i = 0; i < pdev->num_resources; i++) {
+		struct resource	*res = &pdev->resource[i];
+
+		if ((res->flags & IORESOURCE_DMA) && (int)res->start >= 0) {
+			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
+				  ecc->edma_unused);
+		}
+	}
+
+	return 0;
+}
+
+static void edma_setup_interrupt(struct edma_cc *ecc, unsigned lch,
+	void (*callback)(unsigned channel, u16 ch_status, void *data),
+	void *data)
+{
+	lch = EDMA_CHAN_SLOT(lch);
+
+	if (!callback)
+		edma_shadow0_write_array(ecc, SH_IECR, lch >> 5,
+					 BIT(lch & 0x1f));
+
+	ecc->intr_data[lch].callback = callback;
+	ecc->intr_data[lch].data = data;
+
+	if (callback) {
+		edma_shadow0_write_array(ecc, SH_ICR, lch >> 5,
+					 BIT(lch & 0x1f));
+		edma_shadow0_write_array(ecc, SH_IESR, lch >> 5,
+					 BIT(lch & 0x1f));
+	}
+}
+
+/*
+ * paRAM management functions
+ */
+
+/**
+ * edma_write_slot - write parameter RAM data for slot
+ * @ecc: pointer to edma_cc struct
+ * @slot: number of parameter RAM slot being modified
+ * @param: data to be written into parameter RAM slot
+ *
+ * Use this to assign all parameters of a transfer at once.  This
+ * allows more efficient setup of transfers than issuing multiple
+ * calls to set up those parameters in small pieces, and provides
+ * complete control over all transfer options.
+ */
+static void edma_write_slot(struct edma_cc *ecc, unsigned slot,
+			    const struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+	memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE);
+}
+
+/**
+ * edma_read_slot - read parameter RAM data from slot
+ * @ecc: pointer to edma_cc struct
+ * @slot: number of parameter RAM slot being copied
+ * @param: where to store copy of parameter RAM data
+ *
+ * Use this to read data from a parameter RAM slot, perhaps to
+ * save them as a template for later reuse.
+ */
+static void edma_read_slot(struct edma_cc *ecc, unsigned slot,
+			   struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+	memcpy_fromio(param, ecc->base + PARM_OFFSET(slot), PARM_SIZE);
+}
+
+/**
+ * edma_alloc_slot - allocate DMA parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @slot: specific slot to allocate; negative for "any unused slot"
+ *
+ * This allocates a parameter RAM slot, initializing it to hold a
+ * dummy transfer.  Slots allocated using this routine have not been
+ * mapped to a hardware DMA channel, and will normally be used by
+ * linking to them from a slot associated with a DMA channel.
+ *
+ * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
+ * slots may be allocated on behalf of DSP firmware.
+ *
+ * Returns the number of the slot, else negative errno.
+ */
+static int edma_alloc_slot(struct edma_cc *ecc, int slot)
+{
+	if (slot > 0)
+		slot = EDMA_CHAN_SLOT(slot);
+	if (slot < 0) {
+		slot = ecc->num_channels;
+		for (;;) {
+			slot = find_next_zero_bit(ecc->edma_inuse,
+						  ecc->num_slots,
+						  slot);
+			if (slot == ecc->num_slots)
+				return -ENOMEM;
+			if (!test_and_set_bit(slot, ecc->edma_inuse))
+				break;
+		}
+	} else if (slot < ecc->num_channels || slot >= ecc->num_slots) {
+		return -EINVAL;
+	} else if (test_and_set_bit(slot, ecc->edma_inuse)) {
+		return -EBUSY;
+	}
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+
+	return EDMA_CTLR_CHAN(ecc->id, slot);
+}
+
+/**
+ * edma_free_slot - deallocate DMA parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @slot: parameter RAM slot returned from edma_alloc_slot()
+ *
+ * This deallocates the parameter RAM slot allocated by edma_alloc_slot().
+ * Callers are responsible for ensuring the slot is inactive, and will
+ * not be activated.
+ */
+static void edma_free_slot(struct edma_cc *ecc, unsigned slot)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot < ecc->num_channels || slot >= ecc->num_slots)
+		return;
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+	clear_bit(slot, ecc->edma_inuse);
+}
+
+/**
+ * edma_link - link one parameter RAM slot to another
+ * @ecc: pointer to edma_cc struct
+ * @from: parameter RAM slot originating the link
+ * @to: parameter RAM slot which is the link target
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ */
+static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to)
+{
+	from = EDMA_CHAN_SLOT(from);
+	to = EDMA_CHAN_SLOT(to);
+	if (from >= ecc->num_slots || to >= ecc->num_slots)
+		return;
+
+	edma_parm_modify(ecc, PARM_LINK_BCNTRLD, from, 0xffff0000,
+			 PARM_OFFSET(to));
+}
+
+/**
+ * edma_get_position - returns the current transfer point
+ * @ecc: pointer to edma_cc struct
+ * @slot: parameter RAM slot being examined
+ * @dst:  true selects the dest position, false the source
+ *
+ * Returns the position of the current active slot
+ */
+static dma_addr_t edma_get_position(struct edma_cc *ecc, unsigned slot,
+				    bool dst)
+{
+	u32 offs;
+
+	slot = EDMA_CHAN_SLOT(slot);
+	offs = PARM_OFFSET(slot);
+	offs += dst ? PARM_DST : PARM_SRC;
+
+	return edma_read(ecc, offs);
+}
+
+/*-----------------------------------------------------------------------*/
+/**
+ * edma_start - start dma on a channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: channel being activated
+ *
+ * Channels with event associations will be triggered by their hardware
+ * events, and channels without such associations will be triggered by
+ * software.  (At this writing there is no interface for using software
+ * triggers except with channels that don't support hardware triggers.)
+ *
+ * Returns zero on success, else negative errno.
+ */
+static int edma_start(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		int j = channel >> 5;
+		unsigned int mask = BIT(channel & 0x1f);
+
+		/* EDMA channels without event association */
+		if (test_bit(channel, ecc->edma_unused)) {
+			pr_debug("EDMA: ESR%d %08x\n", j,
+				 edma_shadow0_read_array(ecc, SH_ESR, j));
+			edma_shadow0_write_array(ecc, SH_ESR, j, mask);
+			return 0;
+		}
+
+		/* EDMA channel with event association */
+		pr_debug("EDMA: ER%d %08x\n", j,
+			 edma_shadow0_read_array(ecc, SH_ER, j));
+		/* Clear any pending event or error */
+		edma_write_array(ecc, EDMA_ECR, j, mask);
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_EESR, j, mask);
+		pr_debug("EDMA: EER%d %08x\n", j,
+			 edma_shadow0_read_array(ecc, SH_EER, j));
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * edma_stop - stops dma on the channel passed
+ * @ecc: pointer to edma_cc struct
+ * @channel: channel being deactivated
+ *
+ * When @lch is a channel, any active transfer is paused and
+ * all pending hardware events are cleared.  The current transfer
+ * may not be resumed, and the channel's Parameter RAM should be
+ * reinitialized before being reused.
+ */
+static void edma_stop(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		int j = channel >> 5;
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ecc, SH_EECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+
+		/* clear possibly pending completion interrupt */
+		edma_shadow0_write_array(ecc, SH_ICR, j, mask);
+
+		pr_debug("EDMA: EER%d %08x\n", j,
+			 edma_shadow0_read_array(ecc, SH_EER, j));
+
+		/* REVISIT:  consider guarding against inappropriate event
+		 * chaining by overwriting with dummy_paramset.
+		 */
+	}
+}
+
+/**
+ * edma_pause - pause dma on a channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: on which edma_start() has been called
+ *
+ * This temporarily disables EDMA hardware events on the specified channel,
+ * preventing them from triggering new transfers on its behalf
+ */
+static void edma_pause(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ecc, SH_EECR, channel >> 5, mask);
+	}
+}
+
+/**
+ * edma_resume - resumes dma on a paused channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: on which edma_pause() has been called
+ *
+ * This re-enables EDMA hardware events on the specified channel.
+ */
+static void edma_resume(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ecc, SH_EESR, channel >> 5, mask);
+	}
+}
+
+static int edma_trigger_channel(struct edma_cc *ecc, unsigned channel)
+{
+	unsigned int mask;
+
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return -EINVAL;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+	mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ecc, SH_ESR, (channel >> 5), mask);
+
+	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
+		 edma_shadow0_read_array(ecc, SH_ESR, (channel >> 5)));
+	return 0;
+}
+
+/******************************************************************************
+ *
+ * It cleans ParamEntry qand bring back EDMA to initial state if media has
+ * been removed before EDMA has finished.It is usedful for removable media.
+ * Arguments:
+ *      ch_no     - channel no
+ *
+ * Return: zero on success, or corresponding error no on failure
+ *
+ * FIXME this should not be needed ... edma_stop() should suffice.
+ *
+ *****************************************************************************/
+
+static void edma_clean_channel(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < ecc->num_channels) {
+		int j = (channel >> 5);
+		unsigned int mask = BIT(channel & 0x1f);
+
+		pr_debug("EDMA: EMR%d %08x\n", j,
+			 edma_read_array(ecc, EDMA_EMR, j));
+		edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+		/* Clear the corresponding EMR bits */
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_write(ecc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
+	}
+}
+
+/**
+ * edma_alloc_channel - allocate DMA channel and paired parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @channel: specific channel to allocate; negative for "any unmapped channel"
+ * @callback: optional; to be issued on DMA completion or errors
+ * @data: passed to callback
+ * @eventq_no: an EVENTQ_* constant, used to choose which Transfer
+ *	Controller (TC) executes requests using this channel.  Use
+ *	EVENTQ_DEFAULT unless you really need a high priority queue.
+ *
+ * This allocates a DMA channel and its associated parameter RAM slot.
+ * The parameter RAM is initialized to hold a dummy transfer.
+ *
+ * Normal use is to pass a specific channel number as @channel, to make
+ * use of hardware events mapped to that channel.  When the channel will
+ * be used only for software triggering or event chaining, channels not
+ * mapped to hardware events (or mapped to unused events) are preferable.
+ *
+ * DMA transfers start from a channel using edma_start(), or by
+ * chaining.  When the transfer described in that channel's parameter RAM
+ * slot completes, that slot's data may be reloaded through a link.
+ *
+ * DMA errors are only reported to the @callback associated with the
+ * channel driving that transfer, but transfer completion callbacks can
+ * be sent to another channel under control of the TCC field in
+ * the option word of the transfer's parameter RAM set.  Drivers must not
+ * use DMA transfer completion callbacks for channels they did not allocate.
+ * (The same applies to TCC codes used in transfer chaining.)
+ *
+ * Returns the number of the channel, else negative errno.
+ */
+static int edma_alloc_channel(struct edma_cc *ecc, int channel,
+		void (*callback)(unsigned channel, u16 ch_status, void *data),
+		void *data,
+		enum dma_event_q eventq_no)
+{
+	unsigned done = 0;
+	int ret = 0;
+
+	if (!ecc->unused_chan_list_done) {
+		/*
+		 * Scan all the platform devices to find out the EDMA channels
+		 * used and clear them in the unused list, making the rest
+		 * available for ARM usage.
+		 */
+		ret = bus_for_each_dev(&platform_bus_type, NULL, ecc,
+				       prepare_unused_channel_list);
+		if (ret < 0)
+			return ret;
+
+		ecc->unused_chan_list_done = true;
+	}
+
+	if (channel >= 0) {
+		if (ecc->id != EDMA_CTLR(channel)) {
+			dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n",
+				__func__, ecc->id, EDMA_CTLR(channel));
+			return -EINVAL;
+		}
+		channel = EDMA_CHAN_SLOT(channel);
+	}
+
+	if (channel < 0) {
+		channel = 0;
+		for (;;) {
+			channel = find_next_bit(ecc->edma_unused,
+						ecc->num_channels, channel);
+			if (channel == ecc->num_channels)
+				break;
+			if (!test_and_set_bit(channel, ecc->edma_inuse)) {
+				done = 1;
+				break;
+			}
+			channel++;
+		}
+		if (!done)
+			return -ENOMEM;
+	} else if (channel >= ecc->num_channels) {
+		return -EINVAL;
+	} else if (test_and_set_bit(channel, ecc->edma_inuse)) {
+		return -EBUSY;
+	}
+
+	/* ensure access through shadow region 0 */
+	edma_or_array2(ecc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
+
+	/* ensure no events are pending */
+	edma_stop(ecc, EDMA_CTLR_CHAN(ecc->id, channel));
+	edma_write_slot(ecc, channel, &dummy_paramset);
+
+	if (callback)
+		edma_setup_interrupt(ecc, EDMA_CTLR_CHAN(ecc->id, channel),
+				     callback, data);
+
+	edma_map_dmach_to_queue(ecc, channel, eventq_no);
+
+	return EDMA_CTLR_CHAN(ecc->id, channel);
+}
+
+/**
+ * edma_free_channel - deallocate DMA channel
+ * @ecc: pointer to edma_cc struct
+ * @channel: dma channel returned from edma_alloc_channel()
+ *
+ * This deallocates the DMA channel and associated parameter RAM slot
+ * allocated by edma_alloc_channel().
+ *
+ * Callers are responsible for ensuring the channel is inactive, and
+ * will not be reactivated by linking, chaining, or software calls to
+ * edma_start().
+ */
+static void edma_free_channel(struct edma_cc *ecc, unsigned channel)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= ecc->num_channels)
+		return;
+
+	edma_setup_interrupt(ecc, channel, NULL, NULL);
+	/* REVISIT should probably take out of shadow region 0 */
+
+	memcpy_toio(ecc->base + PARM_OFFSET(channel), &dummy_paramset,
+		    PARM_SIZE);
+	clear_bit(channel, ecc->edma_inuse);
+}
+
+/*
+ * edma_assign_channel_eventq - move given channel to desired eventq
+ * Arguments:
+ *	channel - channel number
+ *	eventq_no - queue to move the channel
+ *
+ * Can be used to move a channel to a selected event queue.
+ */
+static void edma_assign_channel_eventq(struct edma_cc *ecc, unsigned channel,
+				       enum dma_event_q eventq_no)
+{
+	if (ecc->id != EDMA_CTLR(channel)) {
+		dev_err(ecc->dev, "%s: ID mismatch for eDMA%d: %d\n", __func__,
+			ecc->id, EDMA_CTLR(channel));
+		return;
+	}
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= ecc->num_channels)
+		return;
+
+	/* default to low priority queue */
+	if (eventq_no == EVENTQ_DEFAULT)
+		eventq_no = ecc->default_queue;
+	if (eventq_no >= ecc->num_tc)
+		return;
+
+	edma_map_dmach_to_queue(ecc, channel, eventq_no);
+}
+
+static irqreturn_t dma_irq_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int ctlr;
+	u32 sh_ier;
+	u32 sh_ipr;
+	u32 bank;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_dbg(ecc->dev, "dma_irq_handler\n");
+
+	sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0);
+	if (!sh_ipr) {
+		sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 1);
+		if (!sh_ipr)
+			return IRQ_NONE;
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 1);
+		bank = 1;
+	} else {
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 0);
+		bank = 0;
+	}
+
+	do {
+		u32 slot;
+		u32 channel;
+
+		dev_dbg(ecc->dev, "IPR%d %08x\n", bank, sh_ipr);
+
+		slot = __ffs(sh_ipr);
+		sh_ipr &= ~(BIT(slot));
+
+		if (sh_ier & BIT(slot)) {
+			channel = (bank << 5) | slot;
+			/* Clear the corresponding IPR bits */
+			edma_shadow0_write_array(ecc, SH_ICR, bank, BIT(slot));
+			if (ecc->intr_data[channel].callback)
+				ecc->intr_data[channel].callback(
+						EDMA_CTLR_CHAN(ctlr, channel),
+						EDMA_DMA_COMPLETE,
+						ecc->intr_data[channel].data);
+		}
+	} while (sh_ipr);
+
+	edma_shadow0_write(ecc, SH_IEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+/******************************************************************************
+ *
+ * DMA error interrupt handler
+ *
+ *****************************************************************************/
+static irqreturn_t dma_ccerr_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int i;
+	int ctlr;
+	unsigned int cnt = 0;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_dbg(ecc->dev, "dma_ccerr_handler\n");
+
+	if ((edma_read_array(ecc, EDMA_EMR, 0) == 0) &&
+	    (edma_read_array(ecc, EDMA_EMR, 1) == 0) &&
+	    (edma_read(ecc, EDMA_QEMR) == 0) &&
+	    (edma_read(ecc, EDMA_CCERR) == 0))
+		return IRQ_NONE;
+
+	while (1) {
+		int j = -1;
+
+		if (edma_read_array(ecc, EDMA_EMR, 0))
+			j = 0;
+		else if (edma_read_array(ecc, EDMA_EMR, 1))
+			j = 1;
+		if (j >= 0) {
+			dev_dbg(ecc->dev, "EMR%d %08x\n", j,
+				edma_read_array(ecc, EDMA_EMR, j));
+			for (i = 0; i < 32; i++) {
+				int k = (j << 5) + i;
+
+				if (edma_read_array(ecc, EDMA_EMR, j) &
+							BIT(i)) {
+					/* Clear the corresponding EMR bits */
+					edma_write_array(ecc, EDMA_EMCR, j,
+							 BIT(i));
+					/* Clear any SER */
+					edma_shadow0_write_array(ecc, SH_SECR,
+								 j, BIT(i));
+					if (ecc->intr_data[k].callback) {
+						ecc->intr_data[k].callback(
+							EDMA_CTLR_CHAN(ctlr, k),
+							EDMA_DMA_CC_ERROR,
+							ecc->intr_data[k].data);
+					}
+				}
+			}
+		} else if (edma_read(ecc, EDMA_QEMR)) {
+			dev_dbg(ecc->dev, "QEMR %02x\n",
+				edma_read(ecc, EDMA_QEMR));
+			for (i = 0; i < 8; i++) {
+				if (edma_read(ecc, EDMA_QEMR) & BIT(i)) {
+					/* Clear the corresponding IPR bits */
+					edma_write(ecc, EDMA_QEMCR, BIT(i));
+					edma_shadow0_write(ecc, SH_QSECR,
+							   BIT(i));
+
+					/* NOTE:  not reported!! */
+				}
+			}
+		} else if (edma_read(ecc, EDMA_CCERR)) {
+			dev_dbg(ecc->dev, "CCERR %08x\n",
+				edma_read(ecc, EDMA_CCERR));
+			/* FIXME:  CCERR.BIT(16) ignored!  much better
+			 * to just write CCERRCLR with CCERR value...
+			 */
+			for (i = 0; i < 8; i++) {
+				if (edma_read(ecc, EDMA_CCERR) & BIT(i)) {
+					/* Clear the corresponding IPR bits */
+					edma_write(ecc, EDMA_CCERRCLR, BIT(i));
+
+					/* NOTE:  not reported!! */
+				}
+			}
+		}
+		if ((edma_read_array(ecc, EDMA_EMR, 0) == 0) &&
+		    (edma_read_array(ecc, EDMA_EMR, 1) == 0) &&
+		    (edma_read(ecc, EDMA_QEMR) == 0) &&
+		    (edma_read(ecc, EDMA_CCERR) == 0))
+			break;
+		cnt++;
+		if (cnt > 10)
+			break;
+	}
+	edma_write(ecc, EDMA_EEVAL, 1);
+	return IRQ_HANDLED;
+}
+
 static inline struct edma_cc *to_edma_cc(struct dma_device *d)
 {
 	return container_of(d, struct edma_cc, dma_slave);
@@ -137,8 +1173,7 @@ static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
 	return container_of(c, struct edma_chan, vchan.chan);
 }
 
-static inline struct edma_desc
-*to_edma_desc(struct dma_async_tx_descriptor *tx)
+static inline struct edma_desc *to_edma_desc(struct dma_async_tx_descriptor *tx)
 {
 	return container_of(tx, struct edma_desc, vdesc.tx);
 }
@@ -151,7 +1186,7 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
 /* Dispatch a queued descriptor to the controller (caller holds lock) */
 static void edma_execute(struct edma_chan *echan)
 {
-	struct edma *cc = echan->ecc->cc;
+	struct edma_cc *ecc = echan->ecc;
 	struct virt_dma_desc *vdesc;
 	struct edma_desc *edesc;
 	struct device *dev = echan->vchan.chan.device->dev;
@@ -176,7 +1211,7 @@ static void edma_execute(struct edma_chan *echan)
 	/* Write descriptor PaRAM set(s) */
 	for (i = 0; i < nslots; i++) {
 		j = i + edesc->processed;
-		edma_write_slot(cc, echan->slot[i], &edesc->pset[j].param);
+		edma_write_slot(ecc, echan->slot[i], &edesc->pset[j].param);
 		edesc->sg_len += edesc->pset[j].len;
 		dev_vdbg(echan->vchan.chan.device->dev,
 			"\n pset[%d]:\n"
@@ -201,7 +1236,7 @@ static void edma_execute(struct edma_chan *echan)
 			edesc->pset[j].param.link_bcntrld);
 		/* Link to the previous slot if not the last set */
 		if (i != (nslots - 1))
-			edma_link(cc, echan->slot[i], echan->slot[i+1]);
+			edma_link(ecc, echan->slot[i], echan->slot[i + 1]);
 	}
 
 	edesc->processed += nslots;
@@ -213,9 +1248,9 @@ static void edma_execute(struct edma_chan *echan)
 	 */
 	if (edesc->processed == edesc->pset_nr) {
 		if (edesc->cyclic)
-			edma_link(cc, echan->slot[nslots-1], echan->slot[1]);
+			edma_link(ecc, echan->slot[nslots - 1], echan->slot[1]);
 		else
-			edma_link(cc, echan->slot[nslots-1],
+			edma_link(ecc, echan->slot[nslots - 1],
 				  echan->ecc->dummy_slot);
 	}
 
@@ -226,19 +1261,19 @@ static void edma_execute(struct edma_chan *echan)
 		 * transfers of MAX_NR_SG
 		 */
 		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
-		edma_clean_channel(cc, echan->ch_num);
-		edma_stop(cc, echan->ch_num);
-		edma_start(cc, echan->ch_num);
-		edma_trigger_channel(cc, echan->ch_num);
+		edma_clean_channel(ecc, echan->ch_num);
+		edma_stop(ecc, echan->ch_num);
+		edma_start(ecc, echan->ch_num);
+		edma_trigger_channel(ecc, echan->ch_num);
 		echan->missed = 0;
 	} else if (edesc->processed <= MAX_NR_SG) {
 		dev_dbg(dev, "first transfer starting on channel %d\n",
 			echan->ch_num);
-		edma_start(cc, echan->ch_num);
+		edma_start(ecc, echan->ch_num);
 	} else {
 		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
 			echan->ch_num, edesc->processed);
-		edma_resume(cc, echan->ch_num);
+		edma_resume(ecc, echan->ch_num);
 	}
 }
 
@@ -256,11 +1291,10 @@ static int edma_terminate_all(struct dma_chan *chan)
 	 * echan->edesc is NULL and exit.)
 	 */
 	if (echan->edesc) {
-		edma_stop(echan->ecc->cc, echan->ch_num);
+		edma_stop(echan->ecc, echan->ch_num);
 		/* Move the cyclic channel back to default queue */
 		if (echan->edesc->cyclic)
-			edma_assign_channel_eventq(echan->ecc->cc,
-						   echan->ch_num,
+			edma_assign_channel_eventq(echan->ecc, echan->ch_num,
 						   EVENTQ_DEFAULT);
 		/*
 		 * free the running request descriptor
@@ -298,7 +1332,7 @@ static int edma_dma_pause(struct dma_chan *chan)
 	if (!echan->edesc)
 		return -EINVAL;
 
-	edma_pause(echan->ecc->cc, echan->ch_num);
+	edma_pause(echan->ecc, echan->ch_num);
 	return 0;
 }
 
@@ -306,7 +1340,7 @@ static int edma_dma_resume(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 
-	edma_resume(echan->ecc->cc, echan->ch_num);
+	edma_resume(echan->ecc, echan->ch_num);
 	return 0;
 }
 
@@ -322,9 +1356,10 @@ static int edma_dma_resume(struct dma_chan *chan)
  * @direction: Direction of the transfer
  */
 static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
-	dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
-	enum dma_slave_buswidth dev_width, unsigned int dma_length,
-	enum dma_transfer_direction direction)
+			    dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+			    enum dma_slave_buswidth dev_width,
+			    unsigned int dma_length,
+			    enum dma_transfer_direction direction)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
@@ -470,8 +1505,8 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 		return NULL;
 	}
 
-	edesc = kzalloc(sizeof(*edesc) + sg_len *
-		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	edesc = kzalloc(sizeof(*edesc) + sg_len * sizeof(edesc->pset[0]),
+			GFP_ATOMIC);
 	if (!edesc) {
 		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
@@ -488,7 +1523,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	for (i = 0; i < nslots; i++) {
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -623,8 +1658,8 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	if (nslots > MAX_NR_SG)
 		return NULL;
 
-	edesc = kzalloc(sizeof(*edesc) + nslots *
-		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
+			GFP_ATOMIC);
 	if (!edesc) {
 		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
@@ -643,7 +1678,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 		/* Allocate a PaRAM slot, if needed */
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(echan->ecc->cc, EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -704,7 +1739,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	}
 
 	/* Place the cyclic channel to highest priority queue */
-	edma_assign_channel_eventq(echan->ecc->cc, echan->ch_num, EVENTQ_0);
+	edma_assign_channel_eventq(echan->ecc, echan->ch_num, EVENTQ_0);
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -712,7 +1747,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 {
 	struct edma_chan *echan = data;
-	struct edma *cc = echan->ecc->cc;
+	struct edma_cc *ecc = echan->ecc;
 	struct device *dev = echan->vchan.chan.device->dev;
 	struct edma_desc *edesc;
 	struct edmacc_param p;
@@ -727,15 +1762,19 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 				vchan_cyclic_callback(&edesc->vdesc);
 				goto out;
 			} else if (edesc->processed == edesc->pset_nr) {
-				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
+				dev_dbg(dev,
+					"Transfer completed on channel %d\n",
+					ch_num);
 				edesc->residue = 0;
-				edma_stop(cc, echan->ch_num);
+				edma_stop(ecc, echan->ch_num);
 				vchan_cookie_complete(&edesc->vdesc);
 				echan->edesc = NULL;
 			} else {
-				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+				dev_dbg(dev,
+					"Sub transfer completed on channel %d\n",
+					ch_num);
 
-				edma_pause(cc, echan->ch_num);
+				edma_pause(ecc, echan->ch_num);
 
 				/* Update statistics for tx_status */
 				edesc->residue -= edesc->sg_len;
@@ -746,7 +1785,7 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 		}
 		break;
 	case EDMA_DMA_CC_ERROR:
-		edma_read_slot(cc, echan->slot[0], &p);
+		edma_read_slot(ecc, echan->slot[0], &p);
 
 		/*
 		 * Issue later based on missed flag which will be sure
@@ -761,18 +1800,18 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 		 * slot. So we avoid doing so and set the missed flag.
 		 */
 		if (p.a_b_cnt == 0 && p.ccnt == 0) {
-			dev_dbg(dev, "Error occurred, looks like slot is null, just setting miss\n");
+			dev_dbg(dev, "Error on null slot, setting miss\n");
 			echan->missed = 1;
 		} else {
 			/*
 			 * The slot is already programmed but the event got
 			 * missed, so its safe to issue it here.
 			 */
-			dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n");
-			edma_clean_channel(cc, echan->ch_num);
-			edma_stop(cc, echan->ch_num);
-			edma_start(cc, echan->ch_num);
-			edma_trigger_channel(cc, echan->ch_num);
+			dev_dbg(dev, "Missed event, TRIGGERING\n");
+			edma_clean_channel(ecc, echan->ch_num);
+			edma_stop(ecc, echan->ch_num);
+			edma_start(ecc, echan->ch_num);
+			edma_trigger_channel(ecc, echan->ch_num);
 		}
 		break;
 	default:
@@ -791,7 +1830,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	int a_ch_num;
 	LIST_HEAD(descs);
 
-	a_ch_num = edma_alloc_channel(echan->ecc->cc, echan->ch_num,
+	a_ch_num = edma_alloc_channel(echan->ecc, echan->ch_num,
 				      edma_callback, echan, EVENTQ_DEFAULT);
 
 	if (a_ch_num < 0) {
@@ -816,7 +1855,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	return 0;
 
 err_wrong_chan:
-	edma_free_channel(echan->ecc->cc, a_ch_num);
+	edma_free_channel(echan->ecc, a_ch_num);
 err_no_chan:
 	return ret;
 }
@@ -829,21 +1868,21 @@ static void edma_free_chan_resources(struct dma_chan *chan)
 	int i;
 
 	/* Terminate transfers */
-	edma_stop(echan->ecc->cc, echan->ch_num);
+	edma_stop(echan->ecc, echan->ch_num);
 
 	vchan_free_chan_resources(&echan->vchan);
 
 	/* Free EDMA PaRAM slots */
 	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
 		if (echan->slot[i] >= 0) {
-			edma_free_slot(echan->ecc->cc, echan->slot[i]);
+			edma_free_slot(echan->ecc, echan->slot[i]);
 			echan->slot[i] = -1;
 		}
 	}
 
 	/* Free EDMA channel */
 	if (echan->alloced) {
-		edma_free_channel(echan->ecc->cc, echan->ch_num);
+		edma_free_channel(echan->ecc, echan->ch_num);
 		echan->alloced = false;
 	}
 
@@ -873,8 +1912,7 @@ static u32 edma_residue(struct edma_desc *edesc)
 	 * We always read the dst/src position from the first RamPar
 	 * pset. That's the one which is active now.
 	 */
-	pos = edma_get_position(edesc->echan->ecc->cc, edesc->echan->slot[0],
-				dst);
+	pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst);
 
 	/*
 	 * Cyclic is simple. Just subtract pset[0].addr from pos.
@@ -935,15 +1973,14 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
 	return ret;
 }
 
-static void __init edma_chan_init(struct edma_cc *ecc,
-				  struct dma_device *dma,
+static void __init edma_chan_init(struct edma_cc *ecc, struct dma_device *dma,
 				  struct edma_chan *echans)
 {
 	int i, j;
 
 	for (i = 0; i < EDMA_CHANS; i++) {
 		struct edma_chan *echan = &echans[i];
-		echan->ch_num = EDMA_CTLR_CHAN(ecc->ctlr, i);
+		echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
 		echan->ecc = ecc;
 		echan->vchan.desc_free = edma_desc_free;
 
@@ -991,14 +2028,189 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
 	INIT_LIST_HEAD(&dma->channels);
 }
 
+static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
+			      struct edma_cc *ecc)
+{
+	int i;
+	u32 value, cccfg;
+	s8 (*queue_priority_map)[2];
+
+	/* Decode the eDMA3 configuration from CCCFG register */
+	cccfg = edma_read(ecc, EDMA_CCCFG);
+
+	value = GET_NUM_REGN(cccfg);
+	ecc->num_region = BIT(value);
+
+	value = GET_NUM_DMACH(cccfg);
+	ecc->num_channels = BIT(value + 1);
+
+	value = GET_NUM_PAENTRY(cccfg);
+	ecc->num_slots = BIT(value + 4);
+
+	value = GET_NUM_EVQUE(cccfg);
+	ecc->num_tc = value + 1;
+
+	dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg);
+	dev_dbg(dev, "num_region: %u\n", ecc->num_region);
+	dev_dbg(dev, "num_channels: %u\n", ecc->num_channels);
+	dev_dbg(dev, "num_slots: %u\n", ecc->num_slots);
+	dev_dbg(dev, "num_tc: %u\n", ecc->num_tc);
+
+	/* Nothing need to be done if queue priority is provided */
+	if (pdata->queue_priority_mapping)
+		return 0;
+
+	/*
+	 * Configure TC/queue priority as follows:
+	 * Q0 - priority 0
+	 * Q1 - priority 1
+	 * Q2 - priority 2
+	 * ...
+	 * The meaning of priority numbers: 0 highest priority, 7 lowest
+	 * priority. So Q0 is the highest priority queue and the last queue has
+	 * the lowest priority.
+	 */
+	queue_priority_map = devm_kzalloc(dev, (ecc->num_tc + 1) * sizeof(s8),
+					  GFP_KERNEL);
+	if (!queue_priority_map)
+		return -ENOMEM;
+
+	for (i = 0; i < ecc->num_tc; i++) {
+		queue_priority_map[i][0] = i;
+		queue_priority_map[i][1] = i;
+	}
+	queue_priority_map[i][0] = -1;
+	queue_priority_map[i][1] = -1;
+
+	pdata->queue_priority_mapping = queue_priority_map;
+	/* Default queue has the lowest priority */
+	pdata->default_queue = i - 1;
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_OF)
+static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata,
+			       size_t sz)
+{
+	const char pname[] = "ti,edma-xbar-event-map";
+	struct resource res;
+	void __iomem *xbar;
+	s16 (*xbar_chans)[2];
+	size_t nelm = sz / sizeof(s16);
+	u32 shift, offset, mux;
+	int ret, i;
+
+	xbar_chans = devm_kzalloc(dev, (nelm + 2) * sizeof(s16), GFP_KERNEL);
+	if (!xbar_chans)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(dev->of_node, 1, &res);
+	if (ret)
+		return -ENOMEM;
+
+	xbar = devm_ioremap(dev, res.start, resource_size(&res));
+	if (!xbar)
+		return -ENOMEM;
+
+	ret = of_property_read_u16_array(dev->of_node, pname, (u16 *)xbar_chans,
+					 nelm);
+	if (ret)
+		return -EIO;
+
+	/* Invalidate last entry for the other user of this mess */
+	nelm >>= 1;
+	xbar_chans[nelm][0] = -1;
+	xbar_chans[nelm][1] = -1;
+
+	for (i = 0; i < nelm; i++) {
+		shift = (xbar_chans[i][1] & 0x03) << 3;
+		offset = xbar_chans[i][1] & 0xfffffffc;
+		mux = readl(xbar + offset);
+		mux &= ~(0xff << shift);
+		mux |= xbar_chans[i][0] << shift;
+		writel(mux, (xbar + offset));
+	}
+
+	pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
+	return 0;
+}
+
+static int edma_of_parse_dt(struct device *dev, struct edma_soc_info *pdata)
+{
+	int ret = 0;
+	struct property *prop;
+	size_t sz;
+	struct edma_rsv_info *rsv_info;
+
+	rsv_info = devm_kzalloc(dev, sizeof(struct edma_rsv_info), GFP_KERNEL);
+	if (!rsv_info)
+		return -ENOMEM;
+	pdata->rsv = rsv_info;
+
+	prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map", &sz);
+	if (prop)
+		ret = edma_xbar_event_map(dev, pdata, sz);
+
+	return ret;
+}
+
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+{
+	struct edma_soc_info *info;
+	int ret;
+
+	info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	ret = edma_of_parse_dt(dev, info);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return info;
+}
+#else
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
 static int edma_probe(struct platform_device *pdev)
 {
-	struct edma_cc *ecc;
-	struct device_node *parent_node = pdev->dev.parent->of_node;
-	struct platform_device *parent_pdev =
-					to_platform_device(pdev->dev.parent);
+	struct edma_soc_info	*info = pdev->dev.platform_data;
+	s8			(*queue_priority_mapping)[2];
+	int			i, off, ln;
+	const s16		(*rsv_chans)[2];
+	const s16		(*rsv_slots)[2];
+	const s16		(*xbar_chans)[2];
+	int			irq;
+	char			*irq_name;
+	struct resource		*mem;
+	struct device_node	*node = pdev->dev.of_node;
+	struct device		*dev = &pdev->dev;
+	struct edma_cc		*ecc;
 	int ret;
 
+	if (node) {
+		info = edma_setup_info_from_dt(dev);
+		if (IS_ERR(info)) {
+			dev_err(dev, "failed to get DT data\n");
+			return PTR_ERR(info);
+		}
+	}
+
+	if (!info)
+		return -ENODEV;
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
 	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
@@ -1009,15 +2221,123 @@ static int edma_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	ecc->cc = edma_get_data(pdev->dev.parent);
-	if (!ecc->cc)
-		return -ENODEV;
+	ecc->dev = dev;
+	ecc->id = pdev->id;
+	/* When booting with DT the pdev->id is -1 */
+	if (ecc->id < 0)
+		ecc->id = 0;
+
+	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc");
+	if (!mem) {
+		dev_dbg(dev, "mem resource not found, using index 0\n");
+		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!mem) {
+			dev_err(dev, "no mem resource?\n");
+			return -ENODEV;
+		}
+	}
+	ecc->base = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(ecc->base))
+		return PTR_ERR(ecc->base);
+
+	platform_set_drvdata(pdev, ecc);
+
+	/* Get eDMA3 configuration from IP */
+	ret = edma_setup_from_hw(dev, info, ecc);
+	if (ret)
+		return ret;
+
+	ecc->default_queue = info->default_queue;
+
+	for (i = 0; i < ecc->num_slots; i++)
+		edma_write_slot(ecc, i, &dummy_paramset);
+
+	/* Mark all channels as unused */
+	memset(ecc->edma_unused, 0xff, sizeof(ecc->edma_unused));
+
+	if (info->rsv) {
+		/* Clear the reserved channels in unused list */
+		rsv_chans = info->rsv->rsv_chans;
+		if (rsv_chans) {
+			for (i = 0; rsv_chans[i][0] != -1; i++) {
+				off = rsv_chans[i][0];
+				ln = rsv_chans[i][1];
+				clear_bits(off, ln, ecc->edma_unused);
+			}
+		}
+
+		/* Set the reserved slots in inuse list */
+		rsv_slots = info->rsv->rsv_slots;
+		if (rsv_slots) {
+			for (i = 0; rsv_slots[i][0] != -1; i++) {
+				off = rsv_slots[i][0];
+				ln = rsv_slots[i][1];
+				set_bits(off, ln, ecc->edma_inuse);
+			}
+		}
+	}
+
+	/* Clear the xbar mapped channels in unused list */
+	xbar_chans = info->xbar_chans;
+	if (xbar_chans) {
+		for (i = 0; xbar_chans[i][1] != -1; i++) {
+			off = xbar_chans[i][1];
+			clear_bits(off, 1, ecc->edma_unused);
+		}
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 0);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccerrint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 2);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < ecc->num_channels; i++)
+		edma_map_dmach_to_queue(ecc, i, info->default_queue);
+
+	queue_priority_mapping = info->queue_priority_mapping;
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
 
-	ecc->ctlr = parent_pdev->id;
-	if (ecc->ctlr < 0)
-		ecc->ctlr = 0;
+	/* Map the channel to param entry if channel mapping logic exist */
+	if (edma_read(ecc, EDMA_CCCFG) & CHMAP_EXIST)
+		edma_direct_dmach_to_param_mapping(ecc);
 
-	ecc->dummy_slot = edma_alloc_slot(ecc->cc, EDMA_SLOT_ANY);
+	for (i = 0; i < ecc->num_region; i++) {
+		edma_write_array2(ecc, EDMA_DRAE, i, 0, 0x0);
+		edma_write_array2(ecc, EDMA_DRAE, i, 1, 0x0);
+		edma_write_array(ecc, EDMA_QRAE, i, 0x0);
+	}
+	ecc->info = info;
+
+	ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
 		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
 		return ecc->dummy_slot;
@@ -1036,19 +2356,16 @@ static int edma_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_reg1;
 
-	platform_set_drvdata(pdev, ecc);
-
-	if (parent_node) {
-		of_dma_controller_register(parent_node, of_dma_xlate_by_chan_id,
+	if (node)
+		of_dma_controller_register(node, of_dma_xlate_by_chan_id,
 					   &ecc->dma_slave);
-	}
 
 	dev_info(&pdev->dev, "TI EDMA DMA engine driver\n");
 
 	return 0;
 
 err_reg1:
-	edma_free_slot(ecc->cc, ecc->dummy_slot);
+	edma_free_slot(ecc, ecc->dummy_slot);
 	return ret;
 }
 
@@ -1056,21 +2373,60 @@ static int edma_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct edma_cc *ecc = dev_get_drvdata(dev);
-	struct device_node *parent_node = pdev->dev.parent->of_node;
 
-	if (parent_node)
-		of_dma_controller_free(parent_node);
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(&ecc->dma_slave);
-	edma_free_slot(ecc->cc, ecc->dummy_slot);
+	edma_free_slot(ecc, ecc->dummy_slot);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int edma_pm_resume(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	int i;
+	s8 (*queue_priority_mapping)[2];
+
+	queue_priority_mapping = ecc->info->queue_priority_mapping;
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
+
+	/* Map the channel to param entry if channel mapping logic */
+	if (edma_read(ecc, EDMA_CCCFG) & CHMAP_EXIST)
+		edma_direct_dmach_to_param_mapping(ecc);
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (test_bit(i, ecc->edma_inuse)) {
+			/* ensure access through shadow region 0 */
+			edma_or_array2(ecc, EDMA_DRAE, 0, i >> 5,
+				       BIT(i & 0x1f));
+
+			edma_setup_interrupt(ecc, EDMA_CTLR_CHAN(ecc->id, i),
+					     ecc->intr_data[i].callback,
+					     ecc->intr_data[i].data);
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops edma_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
+};
+
 static struct platform_driver edma_driver = {
 	.probe		= edma_probe,
 	.remove		= edma_remove,
 	.driver = {
-		.name = "edma-dma-engine",
+		.name	= "edma",
+		.pm	= &edma_pm_ops,
+		.of_match_table = edma_of_ids,
 	},
 };
 
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 466021c03169..6b9d500956e4 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -41,37 +41,6 @@
 #ifndef EDMA_H_
 #define EDMA_H_
 
-/* PaRAM slots are laid out like this */
-struct edmacc_param {
-	u32 opt;
-	u32 src;
-	u32 a_b_cnt;
-	u32 dst;
-	u32 src_dst_bidx;
-	u32 link_bcntrld;
-	u32 src_dst_cidx;
-	u32 ccnt;
-} __packed;
-
-/* fields in edmacc_param.opt */
-#define SAM		BIT(0)
-#define DAM		BIT(1)
-#define SYNCDIM		BIT(2)
-#define STATIC		BIT(3)
-#define EDMA_FWID	(0x07 << 8)
-#define TCCMODE		BIT(11)
-#define EDMA_TCC(t)	((t) << 12)
-#define TCINTEN		BIT(20)
-#define ITCINTEN	BIT(21)
-#define TCCHEN		BIT(22)
-#define ITCCHEN		BIT(23)
-
-/*ch_status paramater of callback function possible values*/
-#define EDMA_DMA_COMPLETE 1
-#define EDMA_DMA_CC_ERROR 2
-#define EDMA_DMA_TC1_ERROR 3
-#define EDMA_DMA_TC2_ERROR 4
-
 enum dma_event_q {
 	EVENTQ_0 = 0,
 	EVENTQ_1 = 1,
@@ -84,49 +53,6 @@ enum dma_event_q {
 #define EDMA_CTLR(i)			((i) >> 16)
 #define EDMA_CHAN_SLOT(i)		((i) & 0xffff)
 
-#define EDMA_CHANNEL_ANY		-1	/* for edma_alloc_channel() */
-#define EDMA_SLOT_ANY			-1	/* for edma_alloc_slot() */
-#define EDMA_CONT_PARAMS_ANY		 1001
-#define EDMA_CONT_PARAMS_FIXED_EXACT	 1002
-#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
-
-#define EDMA_MAX_CC               2
-
-struct edma;
-
-struct edma *edma_get_data(struct device *edma_dev);
-
-/* alloc/free DMA channels and their dedicated parameter RAM slots */
-int edma_alloc_channel(struct edma *cc, int channel,
-	void (*callback)(unsigned channel, u16 ch_status, void *data),
-	void *data, enum dma_event_q);
-void edma_free_channel(struct edma *cc, unsigned channel);
-
-/* alloc/free parameter RAM slots */
-int edma_alloc_slot(struct edma *cc, int slot);
-void edma_free_slot(struct edma *cc, unsigned slot);
-
-/* calls that operate on part of a parameter RAM slot */
-dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst);
-void edma_link(struct edma *cc, unsigned from, unsigned to);
-
-/* calls that operate on an entire parameter RAM slot */
-void edma_write_slot(struct edma *cc, unsigned slot,
-		     const struct edmacc_param *params);
-void edma_read_slot(struct edma *cc, unsigned slot,
-		    struct edmacc_param *params);
-
-/* channel control operations */
-int edma_start(struct edma *cc, unsigned channel);
-void edma_stop(struct edma *cc, unsigned channel);
-void edma_clean_channel(struct edma *cc, unsigned channel);
-void edma_pause(struct edma *cc, unsigned channel);
-void edma_resume(struct edma *cc, unsigned channel);
-int edma_trigger_channel(struct edma *cc, unsigned channel);
-
-void edma_assign_channel_eventq(struct edma *cc, unsigned channel,
-				enum dma_event_q eventq_no);
-
 struct edma_rsv_info {
 
 	const s16	(*rsv_chans)[2];
-- 
cgit v1.2.3


From 1a7caca20ed56a80cea045327deaeb4e4379cbd1 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Date: Fri, 28 Aug 2015 10:39:20 -0700
Subject: soc: qcom: smd: Implement id_table driver matching

Implement a id_table based driver maching mechanism for drivers that
binds to fixed channels and doesn't need any additional configuration,
e.g. IPCRTR and DIAG.

Signed-off-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Andy Gross <agross@codeaurora.org>
---
 drivers/soc/qcom/smd.c       | 25 ++++++++++++++++++-------
 include/linux/soc/qcom/smd.h | 11 +++++++++++
 2 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c
index a6155c917d52..d883c16d1775 100644
--- a/drivers/soc/qcom/smd.c
+++ b/drivers/soc/qcom/smd.c
@@ -727,6 +727,19 @@ static struct qcom_smd_driver *to_smd_driver(struct device *dev)
 
 static int qcom_smd_dev_match(struct device *dev, struct device_driver *drv)
 {
+	struct qcom_smd_device *qsdev = to_smd_device(dev);
+	struct qcom_smd_driver *qsdrv = container_of(drv, struct qcom_smd_driver, driver);
+	const struct qcom_smd_id *match = qsdrv->smd_match_table;
+	const char *name = qsdev->channel->name;
+
+	if (match) {
+		while (match->name[0]) {
+			if (!strcmp(match->name, name))
+				return 1;
+			match++;
+		}
+	}
+
 	return of_driver_match_device(dev, drv);
 }
 
@@ -880,19 +893,17 @@ static int qcom_smd_create_device(struct qcom_smd_channel *channel)
 	if (channel->qsdev)
 		return -EEXIST;
 
-	node = qcom_smd_match_channel(edge->of_node, channel->name);
-	if (!node) {
-		dev_dbg(smd->dev, "no match for '%s'\n", channel->name);
-		return -ENXIO;
-	}
-
 	dev_dbg(smd->dev, "registering '%s'\n", channel->name);
 
 	qsdev = kzalloc(sizeof(*qsdev), GFP_KERNEL);
 	if (!qsdev)
 		return -ENOMEM;
 
-	dev_set_name(&qsdev->dev, "%s.%s", edge->of_node->name, node->name);
+	node = qcom_smd_match_channel(edge->of_node, channel->name);
+	dev_set_name(&qsdev->dev, "%s.%s",
+		     edge->of_node->name,
+		     node ? node->name : channel->name);
+
 	qsdev->dev.parent = smd->dev;
 	qsdev->dev.bus = &qcom_smd_bus;
 	qsdev->dev.release = qcom_smd_release_device;
diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h
index d7e50aa6a4ac..d0cb6d189a0a 100644
--- a/include/linux/soc/qcom/smd.h
+++ b/include/linux/soc/qcom/smd.h
@@ -8,6 +8,14 @@ struct qcom_smd;
 struct qcom_smd_channel;
 struct qcom_smd_lookup;
 
+/**
+ * struct qcom_smd_id - struct used for matching a smd device
+ * @name:	name of the channel
+ */
+struct qcom_smd_id {
+	char name[20];
+};
+
 /**
  * struct qcom_smd_device - smd device struct
  * @dev:	the device struct
@@ -21,6 +29,7 @@ struct qcom_smd_device {
 /**
  * struct qcom_smd_driver - smd driver struct
  * @driver:	underlying device driver
+ * @smd_match_table: static channel match table
  * @probe:	invoked when the smd channel is found
  * @remove:	invoked when the smd channel is closed
  * @callback:	invoked when an inbound message is received on the channel,
@@ -29,6 +38,8 @@ struct qcom_smd_device {
  */
 struct qcom_smd_driver {
 	struct device_driver driver;
+	const struct qcom_smd_id *smd_match_table;
+
 	int (*probe)(struct qcom_smd_device *dev);
 	void (*remove)(struct qcom_smd_device *dev);
 	int (*callback)(struct qcom_smd_device *, const void *, size_t);
-- 
cgit v1.2.3


From 1a03964dec3cecb6382d172b9dfe318735c2cad7 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 2 Sep 2015 15:46:44 -0700
Subject: soc: qcom: Make qcom_smem_get() return a pointer

Passing a void ** almost always requires a cast at the call site.
Instead of littering the code with casts every time this function
is called, have qcom_smem_get() return a void pointer to the
location of the smem item. This frees the caller from having to
cast the pointer.

Cc: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Andy Gross <agross@codeaurora.org>
---
 drivers/soc/qcom/smd.c        | 30 +++++++++---------
 drivers/soc/qcom/smem.c       | 72 +++++++++++++++++++------------------------
 include/linux/soc/qcom/smem.h |  2 +-
 3 files changed, 48 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c
index d883c16d1775..beaea1d5169f 100644
--- a/drivers/soc/qcom/smd.c
+++ b/drivers/soc/qcom/smd.c
@@ -989,10 +989,11 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 	spin_lock_init(&channel->recv_lock);
 	init_waitqueue_head(&channel->fblockread_event);
 
-	ret = qcom_smem_get(edge->remote_pid, smem_info_item, (void **)&info,
-			    &info_size);
-	if (ret)
+	info = qcom_smem_get(edge->remote_pid, smem_info_item, &info_size);
+	if (IS_ERR(info)) {
+		ret = PTR_ERR(info);
 		goto free_name_and_channel;
+	}
 
 	/*
 	 * Use the size of the item to figure out which channel info struct to
@@ -1011,10 +1012,11 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 		goto free_name_and_channel;
 	}
 
-	ret = qcom_smem_get(edge->remote_pid, smem_fifo_item, &fifo_base,
-			    &fifo_size);
-	if (ret)
+	fifo_base = qcom_smem_get(edge->remote_pid, smem_fifo_item, &fifo_size);
+	if (IS_ERR(fifo_base)) {
+		ret =  PTR_ERR(fifo_base);
 		goto free_name_and_channel;
+	}
 
 	/* The channel consist of a rx and tx fifo of equal size */
 	fifo_size /= 2;
@@ -1051,16 +1053,13 @@ static void qcom_discover_channels(struct qcom_smd_edge *edge)
 	unsigned long flags;
 	unsigned fifo_id;
 	unsigned info_id;
-	int ret;
 	int tbl;
 	int i;
 
 	for (tbl = 0; tbl < SMD_ALLOC_TBL_COUNT; tbl++) {
-		ret = qcom_smem_get(edge->remote_pid,
-				    smem_items[tbl].alloc_tbl_id,
-				    (void **)&alloc_tbl,
-				    NULL);
-		if (ret < 0)
+		alloc_tbl = qcom_smem_get(edge->remote_pid,
+				    smem_items[tbl].alloc_tbl_id, NULL);
+		if (IS_ERR(alloc_tbl))
 			continue;
 
 		for (i = 0; i < SMD_ALLOC_TBL_SIZE; i++) {
@@ -1238,11 +1237,12 @@ static int qcom_smd_probe(struct platform_device *pdev)
 	int num_edges;
 	int ret;
 	int i = 0;
+	void *p;
 
 	/* Wait for smem */
-	ret = qcom_smem_get(QCOM_SMEM_HOST_ANY, smem_items[0].alloc_tbl_id, NULL, NULL);
-	if (ret == -EPROBE_DEFER)
-		return ret;
+	p = qcom_smem_get(QCOM_SMEM_HOST_ANY, smem_items[0].alloc_tbl_id, NULL);
+	if (PTR_ERR(p) == -EPROBE_DEFER)
+		return PTR_ERR(p);
 
 	num_edges = of_get_available_child_count(pdev->dev.of_node);
 	array_size = sizeof(*smd) + num_edges * sizeof(struct qcom_smd_edge);
diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c
index f402a606eb7d..e6d0dae63845 100644
--- a/drivers/soc/qcom/smem.c
+++ b/drivers/soc/qcom/smem.c
@@ -378,10 +378,9 @@ int qcom_smem_alloc(unsigned host, unsigned item, size_t size)
 }
 EXPORT_SYMBOL(qcom_smem_alloc);
 
-static int qcom_smem_get_global(struct qcom_smem *smem,
-				unsigned item,
-				void **ptr,
-				size_t *size)
+static void *qcom_smem_get_global(struct qcom_smem *smem,
+				  unsigned item,
+				  size_t *size)
 {
 	struct smem_header *header;
 	struct smem_region *area;
@@ -390,36 +389,32 @@ static int qcom_smem_get_global(struct qcom_smem *smem,
 	unsigned i;
 
 	if (WARN_ON(item >= SMEM_ITEM_COUNT))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	header = smem->regions[0].virt_base;
 	entry = &header->toc[item];
 	if (!entry->allocated)
-		return -ENXIO;
+		return ERR_PTR(-ENXIO);
 
-	if (ptr != NULL) {
-		aux_base = entry->aux_base & AUX_BASE_MASK;
+	aux_base = entry->aux_base & AUX_BASE_MASK;
 
-		for (i = 0; i < smem->num_regions; i++) {
-			area = &smem->regions[i];
+	for (i = 0; i < smem->num_regions; i++) {
+		area = &smem->regions[i];
 
-			if (area->aux_base == aux_base || !aux_base) {
-				*ptr = area->virt_base + entry->offset;
-				break;
-			}
+		if (area->aux_base == aux_base || !aux_base) {
+			if (size != NULL)
+				*size = entry->size;
+			return area->virt_base + entry->offset;
 		}
 	}
-	if (size != NULL)
-		*size = entry->size;
 
-	return 0;
+	return ERR_PTR(-ENOENT);
 }
 
-static int qcom_smem_get_private(struct qcom_smem *smem,
-				 unsigned host,
-				 unsigned item,
-				 void **ptr,
-				 size_t *size)
+static void *qcom_smem_get_private(struct qcom_smem *smem,
+				   unsigned host,
+				   unsigned item,
+				   size_t *size)
 {
 	struct smem_partition_header *phdr;
 	struct smem_private_entry *hdr;
@@ -435,55 +430,54 @@ static int qcom_smem_get_private(struct qcom_smem *smem,
 			dev_err(smem->dev,
 				"Found invalid canary in host %d partition\n",
 				host);
-			return -EINVAL;
+			return ERR_PTR(-EINVAL);
 		}
 
 		if (hdr->item == item) {
-			if (ptr != NULL)
-				*ptr = p + sizeof(*hdr) + hdr->padding_hdr;
-
 			if (size != NULL)
 				*size = hdr->size - hdr->padding_data;
 
-			return 0;
+			return p + sizeof(*hdr) + hdr->padding_hdr;
 		}
 
 		p += sizeof(*hdr) + hdr->padding_hdr + hdr->size;
 	}
 
-	return -ENOENT;
+	return ERR_PTR(-ENOENT);
 }
 
 /**
  * qcom_smem_get() - resolve ptr of size of a smem item
  * @host:	the remote processor, or -1
  * @item:	smem item handle
- * @ptr:	pointer to be filled out with address of the item
  * @size:	pointer to be filled out with size of the item
  *
- * Looks up pointer and size of a smem item.
+ * Looks up smem item and returns pointer to it. Size of smem
+ * item is returned in @size.
  */
-int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size)
+void *qcom_smem_get(unsigned host, unsigned item, size_t *size)
 {
 	unsigned long flags;
 	int ret;
+	void *ptr = ERR_PTR(-EPROBE_DEFER);
 
 	if (!__smem)
-		return -EPROBE_DEFER;
+		return ptr;
 
 	ret = hwspin_lock_timeout_irqsave(__smem->hwlock,
 					  HWSPINLOCK_TIMEOUT,
 					  &flags);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	if (host < SMEM_HOST_COUNT && __smem->partitions[host])
-		ret = qcom_smem_get_private(__smem, host, item, ptr, size);
+		ptr = qcom_smem_get_private(__smem, host, item, size);
 	else
-		ret = qcom_smem_get_global(__smem, item, ptr, size);
+		ptr = qcom_smem_get_global(__smem, item, size);
 
 	hwspin_unlock_irqrestore(__smem->hwlock, &flags);
-	return ret;
+
+	return ptr;
 
 }
 EXPORT_SYMBOL(qcom_smem_get);
@@ -520,11 +514,9 @@ static int qcom_smem_get_sbl_version(struct qcom_smem *smem)
 {
 	unsigned *versions;
 	size_t size;
-	int ret;
 
-	ret = qcom_smem_get_global(smem, SMEM_ITEM_VERSION,
-				   (void **)&versions, &size);
-	if (ret < 0) {
+	versions = qcom_smem_get_global(smem, SMEM_ITEM_VERSION, &size);
+	if (IS_ERR(versions)) {
 		dev_err(smem->dev, "Unable to read the version item\n");
 		return -ENOENT;
 	}
diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h
index bc9630d3aced..785e196ee2ca 100644
--- a/include/linux/soc/qcom/smem.h
+++ b/include/linux/soc/qcom/smem.h
@@ -4,7 +4,7 @@
 #define QCOM_SMEM_HOST_ANY -1
 
 int qcom_smem_alloc(unsigned host, unsigned item, size_t size);
-int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size);
+void *qcom_smem_get(unsigned host, unsigned item, size_t *size);
 
 int qcom_smem_get_free_space(unsigned host);
 
-- 
cgit v1.2.3


From 2d3c277ca5b1a9c12cde1f760ff925b87608bc76 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Tue, 29 Sep 2015 15:48:55 -0400
Subject: qcom-scm: add missing prototype for qcom_scm_is_available()

Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Andy Gross <agross@codeaurora.org>
---
 include/linux/qcom_scm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 6e7d5ec65838..9e12000914b3 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -23,6 +23,8 @@ struct qcom_scm_hdcp_req {
 	u32 val;
 };
 
+extern bool qcom_scm_is_available(void);
+
 extern bool qcom_scm_hdcp_available(void);
 extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
 		u32 *resp);
-- 
cgit v1.2.3


From 36022770de6cf9a403c40a68712ed2d2ea2746be Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Sat, 26 Sep 2015 02:24:34 +0800
Subject: nfs42: add CLONE xdr functions

xdr definitions per draft-ietf-nfsv4-minorversion2-38.txt

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42xdr.c       | 97 ++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/nfs4xdr.c        |  1 +
 include/linux/nfs4.h    |  2 +
 include/linux/nfs_xdr.h | 19 ++++++++++
 4 files changed, 118 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0eb29e14070d..0ca482a51e53 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -34,6 +34,12 @@
 					1 /* opaque devaddr4 length */ + \
 					XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
 #define decode_layoutstats_maxsz	(op_decode_hdr_maxsz)
+#define encode_clone_maxsz		(encode_stateid_maxsz + \
+					encode_stateid_maxsz + \
+					2 /* src offset */ + \
+					2 /* dst offset */ + \
+					2 /* count */)
+#define decode_clone_maxsz		(op_decode_hdr_maxsz)
 
 #define NFS4_enc_allocate_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
@@ -65,7 +71,20 @@
 					 decode_sequence_maxsz + \
 					 decode_putfh_maxsz + \
 					 PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
-
+#define NFS4_enc_clone_sz		(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_savefh_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_clone_maxsz + \
+					 encode_getattr_maxsz)
+#define NFS4_dec_clone_sz		(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_savefh_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_clone_maxsz + \
+					 decode_getattr_maxsz)
 
 static void encode_fallocate(struct xdr_stream *xdr,
 			     struct nfs42_falloc_args *args)
@@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr,
 		encode_uint32(xdr, 0);
 }
 
+static void encode_clone(struct xdr_stream *xdr,
+			 struct nfs42_clone_args *args,
+			 struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr);
+	encode_nfs4_stateid(xdr, &args->src_stateid);
+	encode_nfs4_stateid(xdr, &args->dst_stateid);
+	p = reserve_space(xdr, 3*8);
+	p = xdr_encode_hyper(p, args->src_offset);
+	p = xdr_encode_hyper(p, args->dst_offset);
+	xdr_encode_hyper(p, args->count);
+}
+
 /*
  * Encode ALLOCATE request
  */
@@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode CLONE request
+ */
+static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
+			       struct xdr_stream *xdr,
+			       struct nfs42_clone_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->src_fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_putfh(xdr, args->dst_fh, &hdr);
+	encode_clone(xdr, args, &hdr);
+	encode_getfattr(xdr, args->dst_bitmask, &hdr);
+	encode_nops(&hdr);
+}
+
 static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 {
 	return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_LAYOUTSTATS);
 }
 
+static int decode_clone(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_CLONE);
+}
+
 /*
  * Decode ALLOCATE request
  */
@@ -351,4 +411,39 @@ out:
 	return status;
 }
 
+/*
+ * Decode CLONE request
+ */
+static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
+			      struct xdr_stream *xdr,
+			      struct nfs42_clone_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_savefh(xdr);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_clone(xdr);
+	if (status)
+		goto out;
+	status = decode_getfattr(xdr, res->dst_fattr, res->server);
+
+out:
+	res->rpc_status = status;
+	return status;
+}
+
 #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 788adf3897c7..868472b6b303 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7465,6 +7465,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(ALLOCATE,		enc_allocate,		dec_allocate),
 	PROC(DEALLOCATE,	enc_deallocate,		dec_deallocate),
 	PROC(LAYOUTSTATS,	enc_layoutstats,	dec_layoutstats),
+	PROC(CLONE,		enc_clone,		dec_clone),
 #endif /* CONFIG_NFS_V4_2 */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 00121f298269..c0c695b634d0 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -130,6 +130,7 @@ enum nfs_opnum4 {
 	OP_READ_PLUS = 68,
 	OP_SEEK = 69,
 	OP_WRITE_SAME = 70,
+	OP_CLONE = 71,
 
 	OP_ILLEGAL = 10044,
 };
@@ -501,6 +502,7 @@ enum {
 	NFSPROC4_CLNT_ALLOCATE,
 	NFSPROC4_CLNT_DEALLOCATE,
 	NFSPROC4_CLNT_LAYOUTSTATS,
+	NFSPROC4_CLNT_CLONE,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 52faf7e96c65..ac678b7a65ed 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -359,6 +359,25 @@ struct nfs42_layoutstat_data {
 	struct nfs42_layoutstat_res res;
 };
 
+struct nfs42_clone_args {
+	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh			*src_fh;
+	struct nfs_fh			*dst_fh;
+	nfs4_stateid			src_stateid;
+	nfs4_stateid			dst_stateid;
+	__u64				src_offset;
+	__u64				dst_offset;
+	__u64				count;
+	const u32			*dst_bitmask;
+};
+
+struct nfs42_clone_res {
+	struct nfs4_sequence_res	seq_res;
+	unsigned int			rpc_status;
+	struct nfs_fattr		*dst_fattr;
+	const struct nfs_server		*server;
+};
+
 struct stateowner_id {
 	__u64	create_time;
 	__u32	uniquifier;
-- 
cgit v1.2.3


From e5341f3a5762d17be9cdd06257c02c0098bdcab8 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Sat, 26 Sep 2015 02:24:35 +0800
Subject: nfs42: add CLONE proc functions

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42.h            |  1 +
 fs/nfs/nfs42proc.c        | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c         |  3 +-
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 75 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 814c1255f1d2..b587ccd31083 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
 loff_t nfs42_proc_llseek(struct file *, loff_t, int);
 int nfs42_proc_layoutstats_generic(struct nfs_server *,
 				   struct nfs42_layoutstat_data *);
+int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
 
 #endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 0f020e4d8421..3e92a3cde15d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -271,3 +271,74 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
 		return PTR_ERR(task);
 	return 0;
 }
+
+static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
+			     struct file *dst_f, loff_t src_offset,
+			     loff_t dst_offset, loff_t count)
+{
+	struct inode *src_inode = file_inode(src_f);
+	struct inode *dst_inode = file_inode(dst_f);
+	struct nfs_server *server = NFS_SERVER(dst_inode);
+	struct nfs42_clone_args args = {
+		.src_fh = NFS_FH(src_inode),
+		.dst_fh = NFS_FH(dst_inode),
+		.src_offset = src_offset,
+		.dst_offset = dst_offset,
+		.dst_bitmask = server->cache_consistency_bitmask,
+	};
+	struct nfs42_clone_res res = {
+		.server	= server,
+	};
+	int status;
+
+	msg->rpc_argp = &args;
+	msg->rpc_resp = &res;
+
+	status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+	if (status)
+		return status;
+
+	status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+	if (status)
+		return status;
+
+	res.dst_fattr = nfs_alloc_fattr();
+	if (!res.dst_fattr)
+		return -ENOMEM;
+
+	status = nfs4_call_sync(server->client, server, msg,
+				&args.seq_args, &res.seq_res, 0);
+	if (status == 0)
+		status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
+
+	kfree(res.dst_fattr);
+	return status;
+}
+
+int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
+		     loff_t src_offset, loff_t dst_offset, loff_t count)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE],
+	};
+	struct inode *inode = file_inode(src_f);
+	struct nfs_server *server = NFS_SERVER(file_inode(src_f));
+	struct nfs4_exception exception = { };
+	int err;
+
+	if (!nfs_server_capable(inode, NFS_CAP_CLONE))
+		return -EOPNOTSUPP;
+
+	do {
+		err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
+					dst_offset, count);
+		if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
+			NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
+			return -EOPNOTSUPP;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+
+	return err;
+
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5133bb18830e..9688b1a9787f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8729,7 +8729,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_ALLOCATE
 		| NFS_CAP_DEALLOCATE
 		| NFS_CAP_SEEK
-		| NFS_CAP_LAYOUTSTATS,
+		| NFS_CAP_LAYOUTSTATS
+		| NFS_CAP_CLONE,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 570a7df2775b..a50de1002b20 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -243,5 +243,6 @@ struct nfs_server {
 #define NFS_CAP_ALLOCATE	(1U << 20)
 #define NFS_CAP_DEALLOCATE	(1U << 21)
 #define NFS_CAP_LAYOUTSTATS	(1U << 22)
+#define NFS_CAP_CLONE		(1U << 23)
 
 #endif
-- 
cgit v1.2.3


From 2a92ee92d4545448066fb664674c0ae5a9d5ea99 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Sat, 26 Sep 2015 02:24:37 +0800
Subject: nfs: get clone_blksize when probing fsinfo

NFSv42 CLONE operation is supposed to respect it.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/client.c           |  1 +
 fs/nfs/nfs4proc.c         |  1 +
 fs/nfs/nfs4xdr.c          | 25 +++++++++++++++++++++++++
 include/linux/nfs4.h      |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 6 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 57c5a02f6213..d6d5d2a48e83 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
 
 	server->time_delta = fsinfo->time_delta;
 
+	server->clone_blksize = fsinfo->clone_blksize;
 	/* We're airborne Set socket buffersize */
 	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9688b1a9787f..8814fbe62623 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -239,6 +239,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
 			FATTR4_WORD1_TIME_DELTA
 			| FATTR4_WORD1_FS_LAYOUT_TYPES,
 			FATTR4_WORD2_LAYOUT_BLKSIZE
+			| FATTR4_WORD2_CLONE_BLKSIZE
 };
 
 const u32 nfs4_fs_locations_bitmap[3] = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 868472b6b303..9f656791a338 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4764,6 +4764,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
 	return 0;
 }
 
+/*
+ * The granularity of a CLONE operation.
+ */
+static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
+				     uint32_t *res)
+{
+	__be32 *p;
+
+	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
+	*res = 0;
+	if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p)) {
+			print_overflow_msg(__func__, xdr);
+			return -EIO;
+		}
+		*res = be32_to_cpup(p);
+		bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE;
+	}
+	return 0;
+}
+
 static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 {
 	unsigned int savep;
@@ -4796,6 +4818,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 	if (status != 0)
 		goto xdr_error;
 	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
+	if (status)
+		goto xdr_error;
+	status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize);
 	if (status)
 		goto xdr_error;
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index c0c695b634d0..e7e78537aea2 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -422,6 +422,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_TYPES       (1UL << 0)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
+#define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 
 /* MDS threshold bitmap bits */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a50de1002b20..2469ab0bb3a1 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -147,6 +147,7 @@ struct nfs_server {
 	unsigned int		acdirmax;
 	unsigned int		namelen;
 	unsigned int		options;	/* extra options enabled by mount */
+	unsigned int		clone_blksize;	/* granularity of a CLONE operation */
 #define NFS_OPTION_FSCACHE	0x00000001	/* - local caching enabled */
 #define NFS_OPTION_MIGRATION	0x00000002	/* - NFSv4 migration enabled */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ac678b7a65ed..92ff445e60a0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -141,6 +141,7 @@ struct nfs_fsinfo {
 	__u32			lease_time; /* in seconds */
 	__u32			layouttype; /* supported pnfs layout driver */
 	__u32			blksize; /* preferred pnfs io block size */
+	__u32			clone_blksize; /* granularity of a CLONE operation */
 };
 
 struct nfs_fsstat {
-- 
cgit v1.2.3


From 203d027de4d7068c607b60d4310a1599dec8839f Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 28 Aug 2015 11:56:26 +0200
Subject: vga_switcheroo: Use enum vga_switcheroo_state instead of int

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 6 +++---
 include/linux/vga_switcheroo.h   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 1acbe20143d4..a7870d23c5ab 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -100,7 +100,7 @@
 struct vga_switcheroo_client {
 	struct pci_dev *pdev;
 	struct fb_info *fb_info;
-	int pwr_state;
+	enum vga_switcheroo_state pwr_state;
 	const struct vga_switcheroo_client_ops *ops;
 	int id;
 	bool active;
@@ -344,7 +344,7 @@ find_active_client(struct list_head *head)
  *
  * Return: Power state.
  */
-int vga_switcheroo_get_client_state(struct pci_dev *pdev)
+enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *pdev)
 {
 	struct vga_switcheroo_client *client;
 	enum vga_switcheroo_state ret;
@@ -496,7 +496,7 @@ static int vga_switchoff(struct vga_switcheroo_client *client)
 	return 0;
 }
 
-static void set_audio_state(int id, int state)
+static void set_audio_state(int id, enum vga_switcheroo_state state)
 {
 	struct vga_switcheroo_client *client;
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 376499197717..e63661757505 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -138,7 +138,7 @@ void vga_switcheroo_unregister_handler(void);
 
 int vga_switcheroo_process_delayed_switch(void);
 
-int vga_switcheroo_get_client_state(struct pci_dev *dev);
+enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev);
 
 void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic);
 
@@ -157,7 +157,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
-static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
+static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
 static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {}
 
-- 
cgit v1.2.3


From 21c5ba8c1ee02f204e556c26703cebaf9c4019e0 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 28 Aug 2015 13:30:32 +0200
Subject: vga_switcheroo: Use VGA_SWITCHEROO_UNKNOWN_ID instead of -1

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 17 +++++++++--------
 include/linux/vga_switcheroo.h   |  4 ++++
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index a7870d23c5ab..989630528529 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -84,9 +84,9 @@
  * @fb_info: framebuffer to which console is remapped on switching
  * @pwr_state: current power state
  * @ops: client callbacks
- * @id: client identifier, see enum vga_switcheroo_client_id.
- * 	Determining the id requires the handler, so GPUs are initially
- * 	assigned -1 and later given their true id in vga_switcheroo_enable()
+ * @id: client identifier. Determining the id requires the handler,
+ * 	so gpus are initially assigned VGA_SWITCHEROO_UNKNOWN_ID
+ * 	and later given their true id in vga_switcheroo_enable()
  * @active: whether the outputs are currently switched to this client
  * @driver_power_control: whether power state is controlled by the driver's
  * 	runtime pm. If true, writing ON and OFF to the vga_switcheroo debugfs
@@ -145,7 +145,8 @@ struct vgasr_priv {
 
 #define ID_BIT_AUDIO		0x100
 #define client_is_audio(c)	((c)->id & ID_BIT_AUDIO)
-#define client_is_vga(c)	((c)->id == -1 || !client_is_audio(c))
+#define client_is_vga(c)	((c)->id == VGA_SWITCHEROO_UNKNOWN_ID || \
+				 !client_is_audio(c))
 #define client_id(c)		((c)->id & ~ID_BIT_AUDIO)
 
 static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
@@ -173,7 +174,7 @@ static void vga_switcheroo_enable(void)
 		vgasr_priv.handler->init();
 
 	list_for_each_entry(client, &vgasr_priv.clients, list) {
-		if (client->id != -1)
+		if (client->id != VGA_SWITCHEROO_UNKNOWN_ID)
 			continue;
 		ret = vgasr_priv.handler->get_client_id(client->pdev);
 		if (ret < 0)
@@ -277,7 +278,7 @@ int vga_switcheroo_register_client(struct pci_dev *pdev,
 				   const struct vga_switcheroo_client_ops *ops,
 				   bool driver_power_control)
 {
-	return register_client(pdev, ops, -1,
+	return register_client(pdev, ops, VGA_SWITCHEROO_UNKNOWN_ID,
 			       pdev == vga_default_device(),
 			       driver_power_control);
 }
@@ -583,7 +584,7 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 	int ret;
 	bool delay = false, can_switch;
 	bool just_mux = false;
-	int client_id = -1;
+	int client_id = VGA_SWITCHEROO_UNKNOWN_ID;
 	struct vga_switcheroo_client *client = NULL;
 
 	if (cnt > 63)
@@ -652,7 +653,7 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 		client_id = VGA_SWITCHEROO_DIS;
 	}
 
-	if (client_id == -1)
+	if (client_id == VGA_SWITCHEROO_UNKNOWN_ID)
 		goto out;
 	client = find_client_from_id(&vgasr_priv.clients, client_id);
 	if (!client)
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index e63661757505..88909a865b72 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -59,6 +59,9 @@ enum vga_switcheroo_state {
 
 /**
  * enum vga_switcheroo_client_id - client identifier
+ * @VGA_SWITCHEROO_UNKNOWN_ID: initial identifier assigned to vga clients.
+ * 	Determining the id requires the handler, so GPUs are given their
+ * 	true id in a delayed fashion in vga_switcheroo_enable()
  * @VGA_SWITCHEROO_IGD: integrated graphics device
  * @VGA_SWITCHEROO_DIS: discrete graphics device
  * @VGA_SWITCHEROO_MAX_CLIENTS: currently no more than two GPUs are supported
@@ -66,6 +69,7 @@ enum vga_switcheroo_state {
  * Client identifier. Audio clients use the same identifier & 0x100.
  */
 enum vga_switcheroo_client_id {
+	VGA_SWITCHEROO_UNKNOWN_ID = -1,
 	VGA_SWITCHEROO_IGD,
 	VGA_SWITCHEROO_DIS,
 	VGA_SWITCHEROO_MAX_CLIENTS,
-- 
cgit v1.2.3


From fa3e967fffaf267ccab7959429722da34e45ad77 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 28 Aug 2015 12:54:07 +0200
Subject: vga_switcheroo: Use enum vga_switcheroo_client_id instead of int

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/vga/vga_switcheroo.c | 17 ++++++++++-------
 include/linux/vga_switcheroo.h   |  6 +++---
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 989630528529..af0d372ff7d4 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -102,7 +102,7 @@ struct vga_switcheroo_client {
 	struct fb_info *fb_info;
 	enum vga_switcheroo_state pwr_state;
 	const struct vga_switcheroo_client_ops *ops;
-	int id;
+	enum vga_switcheroo_client_id id;
 	bool active;
 	bool driver_power_control;
 	struct list_head list;
@@ -233,7 +233,8 @@ EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
 
 static int register_client(struct pci_dev *pdev,
 			   const struct vga_switcheroo_client_ops *ops,
-			   int id, bool active, bool driver_power_control)
+			   enum vga_switcheroo_client_id id, bool active,
+			   bool driver_power_control)
 {
 	struct vga_switcheroo_client *client;
 
@@ -288,7 +289,7 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  * vga_switcheroo_register_audio_client - register audio client
  * @pdev: client pci device
  * @ops: client callbacks
- * @id: client identifier, see enum vga_switcheroo_client_id
+ * @id: client identifier
  *
  * Register audio client (audio device on a GPU). The power state of the
  * client is assumed to be ON.
@@ -297,7 +298,7 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  */
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 int id)
+					 enum vga_switcheroo_client_id id)
 {
 	return register_client(pdev, ops, id | ID_BIT_AUDIO, false, false);
 }
@@ -315,7 +316,8 @@ find_client_from_pci(struct list_head *head, struct pci_dev *pdev)
 }
 
 static struct vga_switcheroo_client *
-find_client_from_id(struct list_head *head, int client_id)
+find_client_from_id(struct list_head *head,
+		    enum vga_switcheroo_client_id client_id)
 {
 	struct vga_switcheroo_client *client;
 
@@ -497,7 +499,8 @@ static int vga_switchoff(struct vga_switcheroo_client *client)
 	return 0;
 }
 
-static void set_audio_state(int id, enum vga_switcheroo_state state)
+static void set_audio_state(enum vga_switcheroo_client_id id,
+			    enum vga_switcheroo_state state)
 {
 	struct vga_switcheroo_client *client;
 
@@ -584,7 +587,7 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 	int ret;
 	bool delay = false, can_switch;
 	bool just_mux = false;
-	int client_id = VGA_SWITCHEROO_UNKNOWN_ID;
+	enum vga_switcheroo_client_id client_id = VGA_SWITCHEROO_UNKNOWN_ID;
 	struct vga_switcheroo_client *client = NULL;
 
 	if (cnt > 63)
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 88909a865b72..c55751155631 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -100,7 +100,7 @@ struct vga_switcheroo_handler {
 	int (*switchto)(enum vga_switcheroo_client_id id);
 	int (*power_state)(enum vga_switcheroo_client_id id,
 			   enum vga_switcheroo_state state);
-	int (*get_client_id)(struct pci_dev *pdev);
+	enum vga_switcheroo_client_id (*get_client_id)(struct pci_dev *pdev);
 };
 
 /**
@@ -132,7 +132,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev,
 				   bool driver_power_control);
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 int id);
+					 enum vga_switcheroo_client_id id);
 
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
@@ -158,7 +158,7 @@ static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_i
 static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
-	int id) { return 0; }
+	enum vga_switcheroo_client_id id) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
 static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
-- 
cgit v1.2.3


From b299167652fe58f1ebadb3e3ac84a5a0b74e534e Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 7 Oct 2015 02:45:11 +0300
Subject: i2c: ocores: support big-endian register layout

This allows using OpenCores I2C controller attached to its host in
native-endian mode with bi-endian CPUs. Example of such system is Xtensa
XTFPGA platform.

Acked-by: Peter Korsgaard <peter@korsgaard.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-ocores.c | 31 +++++++++++++++++++++++++++----
 include/linux/i2c-ocores.h      |  1 +
 2 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index abf5db7e441e..11b7b87311ed 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -92,6 +92,16 @@ static void oc_setreg_32(struct ocores_i2c *i2c, int reg, u8 value)
 	iowrite32(value, i2c->base + (reg << i2c->reg_shift));
 }
 
+static void oc_setreg_16be(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	iowrite16be(value, i2c->base + (reg << i2c->reg_shift));
+}
+
+static void oc_setreg_32be(struct ocores_i2c *i2c, int reg, u8 value)
+{
+	iowrite32be(value, i2c->base + (reg << i2c->reg_shift));
+}
+
 static inline u8 oc_getreg_8(struct ocores_i2c *i2c, int reg)
 {
 	return ioread8(i2c->base + (reg << i2c->reg_shift));
@@ -107,6 +117,16 @@ static inline u8 oc_getreg_32(struct ocores_i2c *i2c, int reg)
 	return ioread32(i2c->base + (reg << i2c->reg_shift));
 }
 
+static inline u8 oc_getreg_16be(struct ocores_i2c *i2c, int reg)
+{
+	return ioread16be(i2c->base + (reg << i2c->reg_shift));
+}
+
+static inline u8 oc_getreg_32be(struct ocores_i2c *i2c, int reg)
+{
+	return ioread32be(i2c->base + (reg << i2c->reg_shift));
+}
+
 static inline void oc_setreg(struct ocores_i2c *i2c, int reg, u8 value)
 {
 	i2c->setreg(i2c, reg, value);
@@ -428,6 +448,9 @@ static int ocores_i2c_probe(struct platform_device *pdev)
 		i2c->reg_io_width = 1; /* Set to default value */
 
 	if (!i2c->setreg || !i2c->getreg) {
+		bool be = pdata ? pdata->big_endian :
+			of_device_is_big_endian(pdev->dev.of_node);
+
 		switch (i2c->reg_io_width) {
 		case 1:
 			i2c->setreg = oc_setreg_8;
@@ -435,13 +458,13 @@ static int ocores_i2c_probe(struct platform_device *pdev)
 			break;
 
 		case 2:
-			i2c->setreg = oc_setreg_16;
-			i2c->getreg = oc_getreg_16;
+			i2c->setreg = be ? oc_setreg_16be : oc_setreg_16;
+			i2c->getreg = be ? oc_getreg_16be : oc_getreg_16;
 			break;
 
 		case 4:
-			i2c->setreg = oc_setreg_32;
-			i2c->getreg = oc_getreg_32;
+			i2c->setreg = be ? oc_setreg_32be : oc_setreg_32;
+			i2c->getreg = be ? oc_getreg_32be : oc_getreg_32;
 			break;
 
 		default:
diff --git a/include/linux/i2c-ocores.h b/include/linux/i2c-ocores.h
index 1c06b5c7c308..01edd96fe1f7 100644
--- a/include/linux/i2c-ocores.h
+++ b/include/linux/i2c-ocores.h
@@ -15,6 +15,7 @@ struct ocores_i2c_platform_data {
 	u32 reg_shift; /* register offset shift value */
 	u32 reg_io_width; /* register io read/write width */
 	u32 clock_khz; /* input clock in kHz */
+	bool big_endian; /* registers are big endian */
 	u8 num_devices; /* number of devices in the devices list */
 	struct i2c_board_info const *devices; /* devices connected to the bus */
 };
-- 
cgit v1.2.3


From c6f1891323e6a259c0b0f516a3a3e0f6b0ee2c5f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 7 Oct 2015 10:16:31 +0200
Subject: i2c: rcar: Remove obsolete platform data support

Since commit 4baadb9e05c68962 ("ARM: shmobile: r8a7778: remove obsolete
setup code"), Renesas R-Car SoCs are only supported in generic DT-only
ARM multi-platform builds.  The driver doesn't need to use platform data
anymore, hence remove platform data configuration.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
[wsa: removed now unused ret value and cast to proper enum type]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-rcar.c | 21 ++-------------------
 include/linux/i2c/i2c-rcar.h  | 10 ----------
 2 files changed, 2 insertions(+), 29 deletions(-)
 delete mode 100644 include/linux/i2c/i2c-rcar.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 1921294afc87..bbf3b2505aaf 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -25,7 +25,6 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/i2c.h>
-#include <linux/i2c/i2c-rcar.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -582,7 +581,6 @@ MODULE_DEVICE_TABLE(of, rcar_i2c_dt_ids);
 
 static int rcar_i2c_probe(struct platform_device *pdev)
 {
-	struct i2c_rcar_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct rcar_i2c_priv *priv;
 	struct i2c_adapter *adap;
 	struct resource *res;
@@ -601,15 +599,9 @@ static int rcar_i2c_probe(struct platform_device *pdev)
 	}
 
 	bus_speed = 100000; /* default 100 kHz */
-	ret = of_property_read_u32(dev->of_node, "clock-frequency", &bus_speed);
-	if (ret < 0 && pdata && pdata->bus_speed)
-		bus_speed = pdata->bus_speed;
+	of_property_read_u32(dev->of_node, "clock-frequency", &bus_speed);
 
-	if (pdev->dev.of_node)
-		priv->devtype = (long)of_match_device(rcar_i2c_dt_ids,
-						      dev)->data;
-	else
-		priv->devtype = platform_get_device_id(pdev)->driver_data;
+	priv->devtype = (enum rcar_i2c_type)of_match_device(rcar_i2c_dt_ids, dev)->data;
 
 	ret = rcar_i2c_clock_calculate(priv, bus_speed, dev);
 	if (ret < 0)
@@ -667,14 +659,6 @@ static int rcar_i2c_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct platform_device_id rcar_i2c_id_table[] = {
-	{ "i2c-rcar",		I2C_RCAR_GEN1 },
-	{ "i2c-rcar_gen1",	I2C_RCAR_GEN1 },
-	{ "i2c-rcar_gen2",	I2C_RCAR_GEN2 },
-	{},
-};
-MODULE_DEVICE_TABLE(platform, rcar_i2c_id_table);
-
 static struct platform_driver rcar_i2c_driver = {
 	.driver	= {
 		.name	= "i2c-rcar",
@@ -682,7 +666,6 @@ static struct platform_driver rcar_i2c_driver = {
 	},
 	.probe		= rcar_i2c_probe,
 	.remove		= rcar_i2c_remove,
-	.id_table	= rcar_i2c_id_table,
 };
 
 module_platform_driver(rcar_i2c_driver);
diff --git a/include/linux/i2c/i2c-rcar.h b/include/linux/i2c/i2c-rcar.h
deleted file mode 100644
index 496f5c2b23c9..000000000000
--- a/include/linux/i2c/i2c-rcar.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __I2C_R_CAR_H__
-#define __I2C_R_CAR_H__
-
-#include <linux/platform_device.h>
-
-struct i2c_rcar_platform_data {
-	u32 bus_speed;
-};
-
-#endif /* __I2C_R_CAR_H__ */
-- 
cgit v1.2.3


From 5d170139eb10ae12e1bd076245c42b35453d8324 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 18 Oct 2015 13:05:40 +0200
Subject: vga_switcheroo: Constify vga_switcheroo_handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vga_switcheroo_client_ops has always been declared const since its
introduction with 26ec685ff9d9 ("vga_switcheroo: Introduce struct
vga_switcheroo_client_ops").

Do so for vga_switcheroo_handler as well.

 drivers/gpu/drm/amd/amdgpu/amdgpu.ko:
   6 .rodata       00009888
- 19 .data         00001f00
+ 19 .data         00001ee0
 drivers/gpu/drm/nouveau/nouveau.ko:
   6 .rodata       000460b8
  17 .data         00018fe0
 drivers/gpu/drm/radeon/radeon.ko:
-  7 .rodata       00030944
+  7 .rodata       00030964
- 21 .data         0000d6a0
+ 21 .data         0000d678
 drivers/platform/x86/apple-gmux.ko:
-  7 .rodata       00000140
+  7 .rodata       00000160
- 11 .data         000000e0
+ 11 .data         000000b8

Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Darren Hart <dvhart@linux.intel.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Christian König <christian.koenig@amd.com>.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 2 +-
 drivers/gpu/drm/nouveau/nouveau_acpi.c           | 2 +-
 drivers/gpu/drm/radeon/radeon_atpx_handler.c     | 2 +-
 drivers/gpu/vga/vga_switcheroo.c                 | 4 ++--
 drivers/platform/x86/apple-gmux.c                | 2 +-
 include/linux/vga_switcheroo.h                   | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 1a6b239baab9..5a8fbadbd27b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -501,7 +501,7 @@ static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
 		return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler amdgpu_atpx_handler = {
+static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
 	.switchto = amdgpu_atpx_switchto,
 	.power_state = amdgpu_atpx_power_state,
 	.init = amdgpu_atpx_init,
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index df2d9818aba3..8b8332e46f24 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -206,7 +206,7 @@ static int nouveau_dsm_get_client_id(struct pci_dev *pdev)
 	return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler nouveau_dsm_handler = {
+static const struct vga_switcheroo_handler nouveau_dsm_handler = {
 	.switchto = nouveau_dsm_switchto,
 	.power_state = nouveau_dsm_power_state,
 	.get_client_id = nouveau_dsm_get_client_id,
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index a771b9f0bf98..c4b4f298a283 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -499,7 +499,7 @@ static int radeon_atpx_get_client_id(struct pci_dev *pdev)
 		return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler radeon_atpx_handler = {
+static const struct vga_switcheroo_handler radeon_atpx_handler = {
 	.switchto = radeon_atpx_switchto,
 	.power_state = radeon_atpx_power_state,
 	.init = radeon_atpx_init,
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index af0d372ff7d4..56bbbd65ae8a 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -140,7 +140,7 @@ struct vgasr_priv {
 	int registered_clients;
 	struct list_head clients;
 
-	struct vga_switcheroo_handler *handler;
+	const struct vga_switcheroo_handler *handler;
 };
 
 #define ID_BIT_AUDIO		0x100
@@ -195,7 +195,7 @@ static void vga_switcheroo_enable(void)
  *
  * Return: 0 on success, -EINVAL if a handler was already registered.
  */
-int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler)
+int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler)
 {
 	mutex_lock(&vgasr_mutex);
 	if (vgasr_priv.handler) {
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 0dec3f59917a..976efeb3f2ba 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -346,7 +346,7 @@ gmux_active_client(struct apple_gmux_data *gmux_data)
 	return VGA_SWITCHEROO_DIS;
 }
 
-static struct vga_switcheroo_handler gmux_handler = {
+static const struct vga_switcheroo_handler gmux_handler = {
 	.switchto = gmux_switchto,
 	.power_state = gmux_set_power_state,
 	.get_client_id = gmux_get_client_id,
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index c55751155631..786bc931dbd1 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -137,7 +137,7 @@ int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
 
-int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler);
+int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler);
 void vga_switcheroo_unregister_handler(void);
 
 int vga_switcheroo_process_delayed_switch(void);
@@ -155,7 +155,7 @@ static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
 static inline int vga_switcheroo_register_client(struct pci_dev *dev,
 		const struct vga_switcheroo_client_ops *ops, bool driver_power_control) { return 0; }
 static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
-static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
+static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
 	enum vga_switcheroo_client_id id) { return 0; }
-- 
cgit v1.2.3


From 9a8928359736ab170303ee8a2cc15db54e3a4a8f Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 15 Oct 2015 14:44:38 +0300
Subject: net/mlx4_core: Add support for filtering multicast loopback

Update device capabilities regarding HW filtering multicast loopback support.

Add MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB attribute to mlx4_update_qp to
enable changing QP context to support filtering incoming multicast
loopback traffic according the sender's counter index.

Set the corresponding bits in QP context to force the loopback source
checks if attribute is given and HW supports it.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c            |  6 +++++
 drivers/net/ethernet/mellanox/mlx4/qp.c            | 19 +++++++++++++-
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 30 +++++++++++++++++-----
 include/linux/mlx4/device.h                        |  2 ++
 include/linux/mlx4/qp.h                            | 24 +++++++++++++----
 5 files changed, 68 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index e8ec1dec5789..b3be3a060311 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -155,6 +155,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[27] = "Port beacon support",
 		[28] = "RX-ALL support",
 		[29] = "802.1ad offload support",
+		[31] = "Modifying loopback source checks using UPDATE_QP support",
+		[32] = "Loopback source checks support",
 	};
 	int i;
 
@@ -964,6 +966,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
+	if (field32 & (1 << 18))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB;
+	if (field32 & (1 << 19))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK;
 	if (field32 & (1 << 26))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
 	if (field32 & (1 << 20))
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 20268634a9ab..b16249577aa2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -436,6 +436,23 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
 	}
 
+	if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) {
+		if (!(dev->caps.flags2
+		      & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+			mlx4_warn(dev,
+				  "Trying to set src check LB, but it isn't supported\n");
+			err = -ENOTSUPP;
+			goto out;
+		}
+		pri_addr_path_mask |=
+			1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB;
+		if (params->flags &
+		    MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) {
+			cmd->qp_context.pri_path.fl |=
+				MLX4_FL_ETH_SRC_CHECK_MC_LB;
+		}
+	}
+
 	if (attr & MLX4_UPDATE_QP_VSD) {
 		qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD;
 		if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE)
@@ -458,7 +475,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 	err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0,
 		       MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
 		       MLX4_CMD_NATIVE);
-
+out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 731423ca575d..502f3350088e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -770,9 +770,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 			}
 		}
 
+		/* preserve IF_COUNTER flag */
+		qpc->pri_path.vlan_control &=
+			MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
 		if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE &&
 		    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
-			qpc->pri_path.vlan_control =
+			qpc->pri_path.vlan_control |=
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED |
@@ -780,12 +783,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
 		} else if (0 != vp_oper->state.default_vlan) {
-			qpc->pri_path.vlan_control =
+			qpc->pri_path.vlan_control |=
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
 		} else { /* priority tagged */
-			qpc->pri_path.vlan_control =
+			qpc->pri_path.vlan_control |=
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
 		}
@@ -3762,9 +3765,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
 	update_gid(dev, inbox, (u8)slave);
 	adjust_proxy_tun_qkey(dev, vhcr, qpc);
 	orig_sched_queue = qpc->pri_path.sched_queue;
-	err = update_vport_qp_param(dev, inbox, slave, qpn);
-	if (err)
-		return err;
 
 	err = get_res(dev, slave, qpn, RES_QP, &qp);
 	if (err)
@@ -3774,6 +3774,10 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
 		goto out;
 	}
 
+	err = update_vport_qp_param(dev, inbox, slave, qpn);
+	if (err)
+		goto out;
+
 	err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
 out:
 	/* if no error, save sched queue value passed in by VF. This is
@@ -4208,7 +4212,9 @@ static int add_eth_header(struct mlx4_dev *dev, int slave,
 
 }
 
-#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX)
+#define MLX4_UPD_QP_PATH_MASK_SUPPORTED      (                                \
+	1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX                     |\
+	1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)
 int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
 			   struct mlx4_vhcr *vhcr,
 			   struct mlx4_cmd_mailbox *inbox,
@@ -4231,6 +4237,16 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
 	    (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED))
 		return -EPERM;
 
+	if ((pri_addr_path_mask &
+	     (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
+		!(dev->caps.flags2 &
+		  MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+			mlx4_warn(dev,
+				  "Src check LB for slave %d isn't supported\n",
+				   slave);
+		return -ENOTSUPP;
+	}
+
 	/* Just change the smac for the QP */
 	err = get_res(dev, slave, qpn, RES_QP, &rqp);
 	if (err) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index baad4cb8e9b0..dac6872dbaea 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -214,6 +214,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_IGNORE_FCS		= 1LL <<  28,
 	MLX4_DEV_CAP_FLAG2_PHV_EN		= 1LL <<  29,
 	MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN	= 1LL <<  30,
+	MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
+	MLX4_DEV_CAP_FLAG2_LB_SRC_CHK           = 1ULL << 32,
 };
 
 enum {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index de45a51b3f04..fe052e234906 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -135,7 +135,10 @@ struct mlx4_rss_context {
 
 struct mlx4_qp_path {
 	u8			fl;
-	u8			vlan_control;
+	union {
+		u8			vlan_control;
+		u8			control;
+	};
 	u8			disable_pkey_check;
 	u8			pkey_index;
 	u8			counter_index;
@@ -156,9 +159,16 @@ struct mlx4_qp_path {
 };
 
 enum { /* fl */
-	MLX4_FL_CV      = 1 << 6,
-	MLX4_FL_ETH_HIDE_CQE_VLAN       = 1 << 2
+	MLX4_FL_CV	= 1 << 6,
+	MLX4_FL_ETH_HIDE_CQE_VLAN	= 1 << 2,
+	MLX4_FL_ETH_SRC_CHECK_MC_LB	= 1 << 1,
+	MLX4_FL_ETH_SRC_CHECK_UC_LB	= 1 << 0,
 };
+
+enum { /* control */
+	MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER	= 1 << 7,
+};
+
 enum { /* vlan_control */
 	MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED	= 1 << 6,
 	MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED	= 1 << 5, /* 802.1p priority tag */
@@ -254,6 +264,8 @@ enum {
 	MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE		= 14 + 32,
 	MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX		= 15 + 32,
 	MLX4_UPD_QP_PATH_MASK_FVL_RX			= 16 + 32,
+	MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB	= 18 + 32,
+	MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB	= 19 + 32,
 };
 
 enum { /* param3 */
@@ -436,11 +448,13 @@ enum mlx4_update_qp_attr {
 	MLX4_UPDATE_QP_VSD		= 1 << 1,
 	MLX4_UPDATE_QP_RATE_LIMIT	= 1 << 2,
 	MLX4_UPDATE_QP_QOS_VPORT	= 1 << 3,
-	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 4) - 1
+	MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB      = 1 << 4,
+	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 5) - 1
 };
 
 enum mlx4_update_qp_params_flags {
-	MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE		= 1 << 0,
+	MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB     = 1 << 0,
+	MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE		= 1 << 1,
 };
 
 struct mlx4_update_qp_params {
-- 
cgit v1.2.3


From 62a615e083604d291af0cb18f9b4549531ea4f94 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Oct 2015 12:16:41 +0300
Subject: mfd: core: redo ACPI matching of the children devices

There is at least one board on the market, i.e. Intel Galileo Gen2, that uses
_ADR to distinguish the devices under one actual device. Due to this we have to
improve the quirk in the MFD core to handle that board.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/acpi/enumeration.txt | 11 +++++---
 drivers/mfd/mfd-core.c             | 52 ++++++++++++++++++++++++++------------
 include/linux/mfd/core.h           | 10 ++++++--
 3 files changed, 52 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index b731b292e812..a91ec5af52df 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -347,13 +347,18 @@ For the first case, the MFD drivers do not need to do anything. The
 resulting child platform device will have its ACPI_COMPANION() set to point
 to the parent device.
 
-If the ACPI namespace has a device that we can match using an ACPI id,
-the id should be set like:
+If the ACPI namespace has a device that we can match using an ACPI id or ACPI
+adr, the cell should be set like:
+
+	static struct mfd_cell_acpi_match my_subdevice_cell_acpi_match = {
+		.pnpid = "XYZ0001",
+		.adr = 0,
+	};
 
 	static struct mfd_cell my_subdevice_cell = {
 		.name = "my_subdevice",
 		/* set the resources relative to the parent */
-		.acpi_pnpid = "XYZ0001",
+		.acpi_match = &my_subdevice_cell_acpi_match,
 	};
 
 The ACPI id "XYZ0001" is then used to lookup an ACPI device directly under
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index c17635d3e504..60b60dc63ddd 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -82,29 +82,49 @@ static int mfd_platform_add_cell(struct platform_device *pdev,
 static void mfd_acpi_add_device(const struct mfd_cell *cell,
 				struct platform_device *pdev)
 {
-	struct acpi_device *parent_adev;
+	const struct mfd_cell_acpi_match *match = cell->acpi_match;
+	struct acpi_device *parent, *child;
 	struct acpi_device *adev;
 
-	parent_adev = ACPI_COMPANION(pdev->dev.parent);
-	if (!parent_adev)
+	parent = ACPI_COMPANION(pdev->dev.parent);
+	if (!parent)
 		return;
 
 	/*
-	 * MFD child device gets its ACPI handle either from the ACPI
-	 * device directly under the parent that matches the acpi_pnpid or
-	 * it will use the parent handle if is no acpi_pnpid is given.
+	 * MFD child device gets its ACPI handle either from the ACPI device
+	 * directly under the parent that matches the either _HID or _CID, or
+	 * _ADR or it will use the parent handle if is no ID is given.
+	 *
+	 * Note that use of _ADR is a grey area in the ACPI specification,
+	 * though Intel Galileo Gen2 is using it to distinguish the children
+	 * devices.
 	 */
-	adev = parent_adev;
-	if (cell->acpi_pnpid) {
-		struct acpi_device_id ids[2] = {};
-		struct acpi_device *child_adev;
-
-		strlcpy(ids[0].id, cell->acpi_pnpid, sizeof(ids[0].id));
-		list_for_each_entry(child_adev, &parent_adev->children, node)
-			if (acpi_match_device_ids(child_adev, ids)) {
-				adev = child_adev;
-				break;
+	adev = parent;
+	if (match) {
+		if (match->pnpid) {
+			struct acpi_device_id ids[2] = {};
+
+			strlcpy(ids[0].id, match->pnpid, sizeof(ids[0].id));
+			list_for_each_entry(child, &parent->children, node) {
+				if (acpi_match_device_ids(child, ids)) {
+					adev = child;
+					break;
+				}
+			}
+		} else {
+			unsigned long long adr;
+			acpi_status status;
+
+			list_for_each_entry(child, &parent->children, node) {
+				status = acpi_evaluate_integer(child->handle,
+							       "_ADR", NULL,
+							       &adr);
+				if (ACPI_SUCCESS(status) && match->adr == adr) {
+					adev = child;
+					break;
+				}
 			}
+		}
 	}
 
 	ACPI_COMPANION_SET(&pdev->dev, adev);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index a76bc100bf97..27dac3ff18b9 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -18,6 +18,12 @@
 
 struct irq_domain;
 
+/* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */
+struct mfd_cell_acpi_match {
+	const char			*pnpid;
+	const unsigned long long	adr;
+};
+
 /*
  * This struct describes the MFD part ("cell").
  * After registration the copy of this structure will become the platform data
@@ -44,8 +50,8 @@ struct mfd_cell {
 	 */
 	const char		*of_compatible;
 
-	/* Matches ACPI PNP id, either _HID or _CID */
-	const char		*acpi_pnpid;
+	/* Matches ACPI */
+	const struct mfd_cell_acpi_match	*acpi_match;
 
 	/*
 	 * These resources can be specified relative to the parent device.
-- 
cgit v1.2.3


From 0d0f4aab4e4d290138a4ae7f2ef8469e48c9a669 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Wed, 7 Oct 2015 14:39:55 +0300
Subject: lockd: get rid of reference-counted NSM RPC clients

Currently we have reference-counted per-net NSM RPC client
which created on the first monitor request and destroyed
after the last unmonitor request. It's needed because
RPC client need to know 'utsname()->nodename', but utsname()
might be NULL when nsm_unmonitor() called.

So instead of holding the rpc client we could just save nodename
in struct nlm_host and pass it to the rpc_create().
Thus ther is no need in keeping rpc client until last
unmonitor request. We could create separate RPC clients
for each monitor/unmonitor requests.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/host.c             |  1 +
 fs/lockd/mon.c              | 89 ++++++++-------------------------------------
 fs/lockd/netns.h            |  3 --
 fs/lockd/svc.c              |  1 -
 include/linux/lockd/lockd.h |  1 +
 5 files changed, 17 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index b5f3c3ab0d5f..d716c9993a26 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -161,6 +161,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
 	host->h_nsmhandle  = nsm;
 	host->h_addrbuf    = nsm->sm_addrbuf;
 	host->net	   = ni->net;
+	strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
 
 out:
 	return host;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 6c05cd17e520..19166d4a8d31 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -42,7 +42,7 @@ struct nsm_args {
 	u32			proc;
 
 	char			*mon_name;
-	char			*nodename;
+	const char		*nodename;
 };
 
 struct nsm_res {
@@ -86,69 +86,18 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
 	return rpc_create(&args);
 }
 
-static struct rpc_clnt *nsm_client_set(struct lockd_net *ln,
-		struct rpc_clnt *clnt)
-{
-	spin_lock(&ln->nsm_clnt_lock);
-	if (ln->nsm_users == 0) {
-		if (clnt == NULL)
-			goto out;
-		ln->nsm_clnt = clnt;
-	}
-	clnt = ln->nsm_clnt;
-	ln->nsm_users++;
-out:
-	spin_unlock(&ln->nsm_clnt_lock);
-	return clnt;
-}
-
-static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename)
-{
-	struct rpc_clnt	*clnt, *new;
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-	clnt = nsm_client_set(ln, NULL);
-	if (clnt != NULL)
-		goto out;
-
-	clnt = new = nsm_create(net, nodename);
-	if (IS_ERR(clnt))
-		goto out;
-
-	clnt = nsm_client_set(ln, new);
-	if (clnt != new)
-		rpc_shutdown_client(new);
-out:
-	return clnt;
-}
-
-static void nsm_client_put(struct net *net)
-{
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-	struct rpc_clnt	*clnt = NULL;
-
-	spin_lock(&ln->nsm_clnt_lock);
-	ln->nsm_users--;
-	if (ln->nsm_users == 0) {
-		clnt = ln->nsm_clnt;
-		ln->nsm_clnt = NULL;
-	}
-	spin_unlock(&ln->nsm_clnt_lock);
-	if (clnt != NULL)
-		rpc_shutdown_client(clnt);
-}
-
 static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
-			 struct rpc_clnt *clnt)
+			 const struct nlm_host *host)
 {
 	int		status;
+	struct rpc_clnt *clnt;
 	struct nsm_args args = {
 		.priv		= &nsm->sm_priv,
 		.prog		= NLM_PROGRAM,
 		.vers		= 3,
 		.proc		= NLMPROC_NSM_NOTIFY,
 		.mon_name	= nsm->sm_mon_name,
-		.nodename	= clnt->cl_nodename,
+		.nodename	= host->nodename,
 	};
 	struct rpc_message msg = {
 		.rpc_argp	= &args,
@@ -157,6 +106,13 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
 
 	memset(res, 0, sizeof(*res));
 
+	clnt = nsm_create(host->net, host->nodename);
+	if (IS_ERR(clnt)) {
+		dprintk("lockd: failed to create NSM upcall transport, "
+			"status=%ld, net=%p\n", PTR_ERR(clnt), host->net);
+		return PTR_ERR(clnt);
+	}
+
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
 	if (status == -ECONNREFUSED) {
@@ -170,6 +126,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
 				status);
 	else
 		status = 0;
+
+	rpc_shutdown_client(clnt);
 	return status;
 }
 
@@ -189,32 +147,19 @@ int nsm_monitor(const struct nlm_host *host)
 	struct nsm_handle *nsm = host->h_nsmhandle;
 	struct nsm_res	res;
 	int		status;
-	struct rpc_clnt *clnt;
-	const char *nodename = NULL;
 
 	dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name);
 
 	if (nsm->sm_monitored)
 		return 0;
 
-	if (host->h_rpcclnt)
-		nodename = host->h_rpcclnt->cl_nodename;
-
 	/*
 	 * Choose whether to record the caller_name or IP address of
 	 * this peer in the local rpc.statd's database.
 	 */
 	nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
 
-	clnt = nsm_client_get(host->net, nodename);
-	if (IS_ERR(clnt)) {
-		status = PTR_ERR(clnt);
-		dprintk("lockd: failed to create NSM upcall transport, "
-				"status=%d, net=%p\n", status, host->net);
-		return status;
-	}
-
-	status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt);
+	status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host);
 	if (unlikely(res.status != 0))
 		status = -EIO;
 	if (unlikely(status < 0)) {
@@ -246,11 +191,9 @@ void nsm_unmonitor(const struct nlm_host *host)
 
 	if (atomic_read(&nsm->sm_count) == 1
 	 && nsm->sm_monitored && !nsm->sm_sticky) {
-		struct lockd_net *ln = net_generic(host->net, lockd_net_id);
-
 		dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
 
-		status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt);
+		status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host);
 		if (res.status != 0)
 			status = -EIO;
 		if (status < 0)
@@ -258,8 +201,6 @@ void nsm_unmonitor(const struct nlm_host *host)
 					nsm->sm_name);
 		else
 			nsm->sm_monitored = 0;
-
-		nsm_client_put(host->net);
 	}
 }
 
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 89fe011b1335..5426189406c1 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -12,9 +12,6 @@ struct lockd_net {
 	struct delayed_work grace_period_end;
 	struct lock_manager lockd_manager;
 
-	spinlock_t nsm_clnt_lock;
-	unsigned int nsm_users;
-	struct rpc_clnt *nsm_clnt;
 	struct list_head nsm_handles;
 };
 
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 0dff13f41808..5f31ebd96c06 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -592,7 +592,6 @@ static int lockd_init_net(struct net *net)
 	INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
 	INIT_LIST_HEAD(&ln->lockd_manager.list);
 	ln->lockd_manager.block_opens = false;
-	spin_lock_init(&ln->nsm_clnt_lock);
 	INIT_LIST_HEAD(&ln->nsm_handles);
 	return 0;
 }
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index fd3b65bf51b5..c15373894a42 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -68,6 +68,7 @@ struct nlm_host {
 	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
 	char			*h_addrbuf;	/* address eyecatcher */
 	struct net		*net;		/* host net */
+	char			nodename[UNX_MAXNODENAME + 1];
 };
 
 /*
-- 
cgit v1.2.3


From 778620364ef525e83597a6edee4d0a69db67fd3d Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.com>
Date: Fri, 16 Oct 2015 08:59:08 +1100
Subject: sunrpc/cache: make cache flushing more reliable.

The caches used to store sunrpc authentication information can be
flushed by writing a timestamp to a file in /proc.

This timestamp has a one-second resolution and any entry in cache that
was last_refreshed *before* that time is treated as expired.

This is problematic as it is not possible to reliably flush the cache
without interrupting NFS service.
If the current time is written to the "flush" file, any entry that was
added since the current second started will still be treated as valid.
If one second beyond than the current time is written to the file
then no entries can be valid until the second ticks over.  This will
mean that no NFS request will be handled for up to 1 second.

To resolve this issue we make two changes:

1/ treat an entry as expired if the timestamp when it was last_refreshed
  is before *or the same as* the expiry time.  This means that current
  code which writes out the current time will now flush the cache
  reliably.

2/ when a new entry in added to the cache -  set the last_refresh timestamp
  to 1 second *beyond* the current flush time, when that not in the
  past.
  This ensures that newly added entries will always be valid.

Now that we have a very reliable way to flush the cache, and also
since we are using "since-boot" timestamps which are monotonic,
change cache_purge() to set the smallest future flush_time which
will work, and leave it there: don't revert to '1'.

Also disable the setting of the 'flush_time' far into the future.
That has never been useful and is now awkward as it would cause
last_refresh times to be strange.
Finally: if a request is made to set the 'flush_time' to the current
second, assume the intent is to flush the cache and advance it, if
necessary, to 1 second beyond the current 'flush_time' so that all
active entries will be deemed to be expired.

As part of this we need to add a 'cache_detail' arg to cache_init()
and cache_fresh_locked() so they can find the current ->flush_time.

Signed-off-by: NeilBrown <neilb@suse.com>
Reported-by: Olaf Kirch <okir@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 16 ++++++++-----
 net/sunrpc/cache.c           | 53 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 51 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 03d3b4c92d9f..ed03c9f7f908 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -48,8 +48,10 @@
 struct cache_head {
 	struct hlist_node	cache_list;
 	time_t		expiry_time;	/* After time time, don't use the data */
-	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall 
-					 * was sent, else this is when update was received
+	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall was
+					 * sent, else this is when update was
+					 * received, though it is alway set to
+					 * be *after* ->flush_time.
 					 */
 	struct kref	ref;
 	unsigned long	flags;
@@ -105,8 +107,12 @@ struct cache_detail {
 	/* fields below this comment are for internal use
 	 * and should not be touched by cache owners
 	 */
-	time_t			flush_time;		/* flush all cache items with last_refresh
-							 * earlier than this */
+	time_t			flush_time;		/* flush all cache items with
+							 * last_refresh at or earlier
+							 * than this.  last_refresh
+							 * is never set at or earlier
+							 * than this.
+							 */
 	struct list_head	others;
 	time_t			nextcheck;
 	int			entries;
@@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
 	return  (h->expiry_time < seconds_since_boot()) ||
-		(detail->flush_time > h->last_refresh);
+		(detail->flush_time >= h->last_refresh);
 }
 
 extern int cache_check(struct cache_detail *detail,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..5e4f815c2b34 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
 static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
 
-static void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h, struct cache_detail *detail)
 {
 	time_t now = seconds_since_boot();
 	INIT_HLIST_NODE(&h->cache_list);
 	h->flags = 0;
 	kref_init(&h->ref);
 	h->expiry_time = now + CACHE_NEW_EXPIRY;
+	if (now <= detail->flush_time)
+		/* ensure it isn't already expired */
+		now = detail->flush_time + 1;
 	h->last_refresh = now;
 }
 
@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 	 * we might get lose if we need to
 	 * cache_put it soon.
 	 */
-	cache_init(new);
+	cache_init(new, detail);
 	detail->init(new, key);
 
 	write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
-static void cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+			       struct cache_detail *detail)
 {
+	time_t now = seconds_since_boot();
+	if (now <= detail->flush_time)
+		/* ensure it isn't immediately treated as expired */
+		now = detail->flush_time + 1;
 	head->expiry_time = expiry;
-	head->last_refresh = seconds_since_boot();
+	head->last_refresh = now;
 	smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
 	set_bit(CACHE_VALID, &head->flags);
 }
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
-			cache_fresh_locked(old, new->expiry_time);
+			cache_fresh_locked(old, new->expiry_time, detail);
 			write_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(old, detail);
 			return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 		cache_put(old, detail);
 		return NULL;
 	}
-	cache_init(tmp);
+	cache_init(tmp, detail);
 	detail->init(tmp, old);
 
 	write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
 	detail->entries++;
 	cache_get(tmp);
-	cache_fresh_locked(tmp, new->expiry_time);
-	cache_fresh_locked(old, 0);
+	cache_fresh_locked(tmp, new->expiry_time, detail);
+	cache_fresh_locked(old, 0, detail);
 	write_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail);
 	cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 	rv = cache_is_valid(h);
 	if (rv == -EAGAIN) {
 		set_bit(CACHE_NEGATIVE, &h->flags);
-		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
+				   detail);
 		rv = -ENOENT;
 	}
 	write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
 
 void cache_purge(struct cache_detail *detail)
 {
-	detail->flush_time = LONG_MAX;
+	time_t now = seconds_since_boot();
+	if (detail->flush_time >= now)
+		now = detail->flush_time + 1;
+	/* 'now' is the maximum value any 'last_refresh' can have */
+	detail->flush_time = now;
 	detail->nextcheck = seconds_since_boot();
 	cache_flush();
-	detail->flush_time = 1;
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 {
 	char tbuf[20];
 	char *bp, *ep;
+	time_t then, now;
 
 	if (*ppos || count > sizeof(tbuf)-1)
 		return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 		return -EINVAL;
 
 	bp = tbuf;
-	cd->flush_time = get_expiry(&bp);
-	cd->nextcheck = seconds_since_boot();
+	then = get_expiry(&bp);
+	now = seconds_since_boot();
+	cd->nextcheck = now;
+	/* Can only set flush_time to 1 second beyond "now", or
+	 * possibly 1 second beyond flushtime.  This is because
+	 * flush_time never goes backwards so it mustn't get too far
+	 * ahead of time.
+	 */
+	if (then >= now) {
+		/* Want to flush everything, so behave like cache_purge() */
+		if (cd->flush_time >= now)
+			now = cd->flush_time + 1;
+		then = now;
+	}
+
+	cd->flush_time = then;
 	cache_flush();
 
 	*ppos += count;
-- 
cgit v1.2.3


From c0e5c4450494d74c8deb4f47ddcbb74c94937e20 Mon Sep 17 00:00:00 2001
From: Dustin Byford <dustin@cumulusnetworks.com>
Date: Fri, 23 Oct 2015 12:27:06 -0700
Subject: acpi: add acpi_preset_companion() stub

Add a stub for acpi_preset_companion().  Fixes build failures when
acpi_preset_companion() is used and CONFIG_ACPI is not set.

Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Dustin Byford <dustin@cumulusnetworks.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/acpi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 43856d19cf4d..43b55e751dea 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -477,6 +477,11 @@ static inline bool has_acpi_companion(struct device *dev)
 	return false;
 }
 
+static inline void acpi_preset_companion(struct device *dev,
+					 struct acpi_device *parent, u64 addr)
+{
+}
+
 static inline const char *acpi_dev_name(struct acpi_device *adev)
 {
 	return NULL;
-- 
cgit v1.2.3


From d787dcdb9c8f412b1dd0727f90d3f793a61a2551 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 23 Oct 2015 20:41:31 +0200
Subject: bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus

Reduced Serial Bus (RSB) is an Allwinner proprietery interface
used to communicate with PMICs and other peripheral ICs.

RSB is a two-wire push-pull serial bus that supports 1 master
device and up to 15 active slave devices.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/bus/Kconfig       |  11 +
 drivers/bus/Makefile      |   1 +
 drivers/bus/sunxi-rsb.c   | 783 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sunxi-rsb.h | 105 +++++++
 4 files changed, 900 insertions(+)
 create mode 100644 drivers/bus/sunxi-rsb.c
 create mode 100644 include/linux/sunxi-rsb.h

(limited to 'include/linux')

diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 1a82f3a17681..78382de19ed9 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -121,6 +121,17 @@ config SIMPLE_PM_BUS
 	  Controller (BSC, sometimes called "LBSC within Bus Bridge", or
 	  "External Bus Interface") as found on several Renesas ARM SoCs.
 
+config SUNXI_RSB
+	tristate "Allwinner sunXi Reduced Serial Bus Driver"
+	  default MACH_SUN8I || MACH_SUN9I
+	  depends on ARCH_SUNXI
+	  select REGMAP
+	  help
+	  Say y here to enable support for Allwinner's Reduced Serial Bus
+	  (RSB) support. This controller is responsible for communicating
+	  with various RSB based devices, such as AXP223, AXP8XX PMICs,
+	  and AC100/AC200 ICs.
+
 config VEXPRESS_CONFIG
 	bool "Versatile Express configuration bus"
 	default y if ARCH_VEXPRESS
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 790e7b933fb2..fcb9f9794a1f 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -15,5 +15,6 @@ obj-$(CONFIG_MVEBU_MBUS) 	+= mvebu-mbus.o
 obj-$(CONFIG_OMAP_INTERCONNECT)	+= omap_l3_smx.o omap_l3_noc.o
 
 obj-$(CONFIG_OMAP_OCP2SCP)	+= omap-ocp2scp.o
+obj-$(CONFIG_SUNXI_RSB)		+= sunxi-rsb.o
 obj-$(CONFIG_SIMPLE_PM_BUS)	+= simple-pm-bus.o
 obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
new file mode 100644
index 000000000000..846bc29c157d
--- /dev/null
+++ b/drivers/bus/sunxi-rsb.c
@@ -0,0 +1,783 @@
+/*
+ * RSB (Reduced Serial Bus) driver.
+ *
+ * Author: Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ * The RSB controller looks like an SMBus controller which only supports
+ * byte and word data transfers. But, it differs from standard SMBus
+ * protocol on several aspects:
+ * - it uses addresses set at runtime to address slaves. Runtime addresses
+ *   are sent to slaves using their 12bit hardware addresses. Up to 15
+ *   runtime addresses are available.
+ * - it adds a parity bit every 8bits of data and address for read and
+ *   write accesses; this replaces the ack bit
+ * - only one read access is required to read a byte (instead of a write
+ *   followed by a read access in standard SMBus protocol)
+ * - there's no Ack bit after each read access
+ *
+ * This means this bus cannot be used to interface with standard SMBus
+ * devices. Devices known to support this interface include the AXP223,
+ * AXP809, and AXP806 PMICs, and the AC100 audio codec, all from X-Powers.
+ *
+ * A description of the operation and wire protocol can be found in the
+ * RSB section of Allwinner's A80 user manual, which can be found at
+ *
+ *     https://github.com/allwinner-zh/documents/tree/master/A80
+ *
+ * This document is officially released by Allwinner.
+ *
+ * This driver is based on i2c-sun6i-p2wi.c, the P2WI bus driver.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/clk/clk-conf.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/sunxi-rsb.h>
+#include <linux/types.h>
+
+/* RSB registers */
+#define RSB_CTRL	0x0	/* Global control */
+#define RSB_CCR		0x4	/* Clock control */
+#define RSB_INTE	0x8	/* Interrupt controls */
+#define RSB_INTS	0xc	/* Interrupt status */
+#define RSB_ADDR	0x10	/* Address to send with read/write command */
+#define RSB_DATA	0x1c	/* Data to read/write */
+#define RSB_LCR		0x24	/* Line control */
+#define RSB_DMCR	0x28	/* Device mode (init) control */
+#define RSB_CMD		0x2c	/* RSB Command */
+#define RSB_DAR		0x30	/* Device address / runtime address */
+
+/* CTRL fields */
+#define RSB_CTRL_START_TRANS		BIT(7)
+#define RSB_CTRL_ABORT_TRANS		BIT(6)
+#define RSB_CTRL_GLOBAL_INT_ENB		BIT(1)
+#define RSB_CTRL_SOFT_RST		BIT(0)
+
+/* CLK CTRL fields */
+#define RSB_CCR_SDA_OUT_DELAY(v)	(((v) & 0x7) << 8)
+#define RSB_CCR_MAX_CLK_DIV		0xff
+#define RSB_CCR_CLK_DIV(v)		((v) & RSB_CCR_MAX_CLK_DIV)
+
+/* STATUS fields */
+#define RSB_INTS_TRANS_ERR_ACK		BIT(16)
+#define RSB_INTS_TRANS_ERR_DATA_BIT(v)	(((v) >> 8) & 0xf)
+#define RSB_INTS_TRANS_ERR_DATA		GENMASK(11, 8)
+#define RSB_INTS_LOAD_BSY		BIT(2)
+#define RSB_INTS_TRANS_ERR		BIT(1)
+#define RSB_INTS_TRANS_OVER		BIT(0)
+
+/* LINE CTRL fields*/
+#define RSB_LCR_SCL_STATE		BIT(5)
+#define RSB_LCR_SDA_STATE		BIT(4)
+#define RSB_LCR_SCL_CTL			BIT(3)
+#define RSB_LCR_SCL_CTL_EN		BIT(2)
+#define RSB_LCR_SDA_CTL			BIT(1)
+#define RSB_LCR_SDA_CTL_EN		BIT(0)
+
+/* DEVICE MODE CTRL field values */
+#define RSB_DMCR_DEVICE_START		BIT(31)
+#define RSB_DMCR_MODE_DATA		(0x7c << 16)
+#define RSB_DMCR_MODE_REG		(0x3e << 8)
+#define RSB_DMCR_DEV_ADDR		0x00
+
+/* CMD values */
+#define RSB_CMD_RD8			0x8b
+#define RSB_CMD_RD16			0x9c
+#define RSB_CMD_RD32			0xa6
+#define RSB_CMD_WR8			0x4e
+#define RSB_CMD_WR16			0x59
+#define RSB_CMD_WR32			0x63
+#define RSB_CMD_STRA			0xe8
+
+/* DAR fields */
+#define RSB_DAR_RTA(v)			(((v) & 0xff) << 16)
+#define RSB_DAR_DA(v)			((v) & 0xffff)
+
+#define RSB_MAX_FREQ			20000000
+
+#define RSB_CTRL_NAME			"sunxi-rsb"
+
+struct sunxi_rsb_addr_map {
+	u16 hwaddr;
+	u8 rtaddr;
+};
+
+struct sunxi_rsb {
+	struct device *dev;
+	void __iomem *regs;
+	struct clk *clk;
+	struct reset_control *rstc;
+	struct completion complete;
+	struct mutex lock;
+	unsigned int status;
+};
+
+/* bus / slave device related functions */
+static struct bus_type sunxi_rsb_bus;
+
+static int sunxi_rsb_device_match(struct device *dev, struct device_driver *drv)
+{
+	return of_driver_match_device(dev, drv);
+}
+
+static int sunxi_rsb_device_probe(struct device *dev)
+{
+	const struct sunxi_rsb_driver *drv = to_sunxi_rsb_driver(dev->driver);
+	struct sunxi_rsb_device *rdev = to_sunxi_rsb_device(dev);
+	int ret;
+
+	if (!drv->probe)
+		return -ENODEV;
+
+	if (!rdev->irq) {
+		int irq = -ENOENT;
+
+		if (dev->of_node)
+			irq = of_irq_get(dev->of_node, 0);
+
+		if (irq == -EPROBE_DEFER)
+			return irq;
+		if (irq < 0)
+			irq = 0;
+
+		rdev->irq = irq;
+	}
+
+	ret = of_clk_set_defaults(dev->of_node, false);
+	if (ret < 0)
+		return ret;
+
+	return drv->probe(rdev);
+}
+
+static int sunxi_rsb_device_remove(struct device *dev)
+{
+	const struct sunxi_rsb_driver *drv = to_sunxi_rsb_driver(dev->driver);
+
+	return drv->remove(to_sunxi_rsb_device(dev));
+}
+
+static struct bus_type sunxi_rsb_bus = {
+	.name		= RSB_CTRL_NAME,
+	.match		= sunxi_rsb_device_match,
+	.probe		= sunxi_rsb_device_probe,
+	.remove		= sunxi_rsb_device_remove,
+};
+
+static void sunxi_rsb_dev_release(struct device *dev)
+{
+	struct sunxi_rsb_device *rdev = to_sunxi_rsb_device(dev);
+
+	kfree(rdev);
+}
+
+/**
+ * sunxi_rsb_device_create() - allocate and add an RSB device
+ * @rsb:	RSB controller
+ * @node:	RSB slave device node
+ * @hwaddr:	RSB slave hardware address
+ * @rtaddr:	RSB slave runtime address
+ */
+static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb,
+		struct device_node *node, u16 hwaddr, u8 rtaddr)
+{
+	int err;
+	struct sunxi_rsb_device *rdev;
+
+	rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
+	if (!rdev)
+		return ERR_PTR(-ENOMEM);
+
+	rdev->rsb = rsb;
+	rdev->hwaddr = hwaddr;
+	rdev->rtaddr = rtaddr;
+	rdev->dev.bus = &sunxi_rsb_bus;
+	rdev->dev.parent = rsb->dev;
+	rdev->dev.of_node = node;
+	rdev->dev.release = sunxi_rsb_dev_release;
+
+	dev_set_name(&rdev->dev, "%s-%x", RSB_CTRL_NAME, hwaddr);
+
+	err = device_register(&rdev->dev);
+	if (err < 0) {
+		dev_err(&rdev->dev, "Can't add %s, status %d\n",
+			dev_name(&rdev->dev), err);
+		goto err_device_add;
+	}
+
+	dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev));
+
+err_device_add:
+	put_device(&rdev->dev);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * sunxi_rsb_device_unregister(): unregister an RSB device
+ * @rdev:	rsb_device to be removed
+ */
+static void sunxi_rsb_device_unregister(struct sunxi_rsb_device *rdev)
+{
+	device_unregister(&rdev->dev);
+}
+
+static int sunxi_rsb_remove_devices(struct device *dev, void *data)
+{
+	struct sunxi_rsb_device *rdev = to_sunxi_rsb_device(dev);
+
+	if (dev->bus == &sunxi_rsb_bus)
+		sunxi_rsb_device_unregister(rdev);
+
+	return 0;
+}
+
+/**
+ * sunxi_rsb_driver_register() - Register device driver with RSB core
+ * @rdrv:	device driver to be associated with slave-device.
+ *
+ * This API will register the client driver with the RSB framework.
+ * It is typically called from the driver's module-init function.
+ */
+int sunxi_rsb_driver_register(struct sunxi_rsb_driver *rdrv)
+{
+	rdrv->driver.bus = &sunxi_rsb_bus;
+	return driver_register(&rdrv->driver);
+}
+EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register);
+
+/* common code that starts a transfer */
+static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb)
+{
+	if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) {
+		dev_dbg(rsb->dev, "RSB transfer still in progress\n");
+		return -EBUSY;
+	}
+
+	reinit_completion(&rsb->complete);
+
+	writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER,
+	       rsb->regs + RSB_INTE);
+	writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB,
+	       rsb->regs + RSB_CTRL);
+
+	if (!wait_for_completion_io_timeout(&rsb->complete,
+					    msecs_to_jiffies(100))) {
+		dev_dbg(rsb->dev, "RSB timeout\n");
+
+		/* abort the transfer */
+		writel(RSB_CTRL_ABORT_TRANS, rsb->regs + RSB_CTRL);
+
+		/* clear any interrupt flags */
+		writel(readl(rsb->regs + RSB_INTS), rsb->regs + RSB_INTS);
+
+		return -ETIMEDOUT;
+	}
+
+	if (rsb->status & RSB_INTS_LOAD_BSY) {
+		dev_dbg(rsb->dev, "RSB busy\n");
+		return -EBUSY;
+	}
+
+	if (rsb->status & RSB_INTS_TRANS_ERR) {
+		if (rsb->status & RSB_INTS_TRANS_ERR_ACK) {
+			dev_dbg(rsb->dev, "RSB slave nack\n");
+			return -EINVAL;
+		}
+
+		if (rsb->status & RSB_INTS_TRANS_ERR_DATA) {
+			dev_dbg(rsb->dev, "RSB transfer data error\n");
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
+			  u32 *buf, size_t len)
+{
+	u32 cmd;
+	int ret;
+
+	if (!buf)
+		return -EINVAL;
+
+	switch (len) {
+	case 1:
+		cmd = RSB_CMD_RD8;
+		break;
+	case 2:
+		cmd = RSB_CMD_RD16;
+		break;
+	case 4:
+		cmd = RSB_CMD_RD32;
+		break;
+	default:
+		dev_err(rsb->dev, "Invalid access width: %d\n", len);
+		return -EINVAL;
+	}
+
+	mutex_lock(&rsb->lock);
+
+	writel(addr, rsb->regs + RSB_ADDR);
+	writel(RSB_DAR_RTA(rtaddr), rsb->regs + RSB_DAR);
+	writel(cmd, rsb->regs + RSB_CMD);
+
+	ret = _sunxi_rsb_run_xfer(rsb);
+	if (ret)
+		goto out;
+
+	*buf = readl(rsb->regs + RSB_DATA);
+
+	mutex_unlock(&rsb->lock);
+
+out:
+	return ret;
+}
+
+static int sunxi_rsb_write(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
+			   const u32 *buf, size_t len)
+{
+	u32 cmd;
+	int ret;
+
+	if (!buf)
+		return -EINVAL;
+
+	switch (len) {
+	case 1:
+		cmd = RSB_CMD_WR8;
+		break;
+	case 2:
+		cmd = RSB_CMD_WR16;
+		break;
+	case 4:
+		cmd = RSB_CMD_WR32;
+		break;
+	default:
+		dev_err(rsb->dev, "Invalid access width: %d\n", len);
+		return -EINVAL;
+	}
+
+	mutex_lock(&rsb->lock);
+
+	writel(addr, rsb->regs + RSB_ADDR);
+	writel(RSB_DAR_RTA(rtaddr), rsb->regs + RSB_DAR);
+	writel(*buf, rsb->regs + RSB_DATA);
+	writel(cmd, rsb->regs + RSB_CMD);
+	ret = _sunxi_rsb_run_xfer(rsb);
+
+	mutex_unlock(&rsb->lock);
+
+	return ret;
+}
+
+/* RSB regmap functions */
+struct sunxi_rsb_ctx {
+	struct sunxi_rsb_device *rdev;
+	int size;
+};
+
+static int regmap_sunxi_rsb_reg_read(void *context, unsigned int reg,
+				     unsigned int *val)
+{
+	struct sunxi_rsb_ctx *ctx = context;
+	struct sunxi_rsb_device *rdev = ctx->rdev;
+
+	if (reg > 0xff)
+		return -EINVAL;
+
+	return sunxi_rsb_read(rdev->rsb, rdev->rtaddr, reg, val, ctx->size);
+}
+
+static int regmap_sunxi_rsb_reg_write(void *context, unsigned int reg,
+				      unsigned int val)
+{
+	struct sunxi_rsb_ctx *ctx = context;
+	struct sunxi_rsb_device *rdev = ctx->rdev;
+
+	return sunxi_rsb_write(rdev->rsb, rdev->rtaddr, reg, &val, ctx->size);
+}
+
+static void regmap_sunxi_rsb_free_ctx(void *context)
+{
+	struct sunxi_rsb_ctx *ctx = context;
+
+	kfree(ctx);
+}
+
+static struct regmap_bus regmap_sunxi_rsb = {
+	.reg_write = regmap_sunxi_rsb_reg_write,
+	.reg_read = regmap_sunxi_rsb_reg_read,
+	.free_context = regmap_sunxi_rsb_free_ctx,
+	.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
+	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+};
+
+static struct sunxi_rsb_ctx *regmap_sunxi_rsb_init_ctx(struct sunxi_rsb_device *rdev,
+		const struct regmap_config *config)
+{
+	struct sunxi_rsb_ctx *ctx;
+
+	switch (config->val_bits) {
+	case 8:
+	case 16:
+	case 32:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->rdev = rdev;
+	ctx->size = config->val_bits / 8;
+
+	return ctx;
+}
+
+struct regmap *__devm_regmap_init_sunxi_rsb(struct sunxi_rsb_device *rdev,
+					    const struct regmap_config *config,
+					    struct lock_class_key *lock_key,
+					    const char *lock_name)
+{
+	struct sunxi_rsb_ctx *ctx = regmap_sunxi_rsb_init_ctx(rdev, config);
+
+	if (IS_ERR(ctx))
+		return ERR_CAST(ctx);
+
+	return __devm_regmap_init(&rdev->dev, &regmap_sunxi_rsb, ctx, config,
+				  lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_sunxi_rsb);
+
+/* RSB controller driver functions */
+static irqreturn_t sunxi_rsb_irq(int irq, void *dev_id)
+{
+	struct sunxi_rsb *rsb = dev_id;
+	u32 status;
+
+	status = readl(rsb->regs + RSB_INTS);
+	rsb->status = status;
+
+	/* Clear interrupts */
+	status &= (RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR |
+		   RSB_INTS_TRANS_OVER);
+	writel(status, rsb->regs + RSB_INTS);
+
+	complete(&rsb->complete);
+
+	return IRQ_HANDLED;
+}
+
+static int sunxi_rsb_init_device_mode(struct sunxi_rsb *rsb)
+{
+	int ret = 0;
+	u32 reg;
+
+	/* send init sequence */
+	writel(RSB_DMCR_DEVICE_START | RSB_DMCR_MODE_DATA |
+	       RSB_DMCR_MODE_REG | RSB_DMCR_DEV_ADDR, rsb->regs + RSB_DMCR);
+
+	readl_poll_timeout(rsb->regs + RSB_DMCR, reg,
+			   !(reg & RSB_DMCR_DEVICE_START), 100, 250000);
+	if (reg & RSB_DMCR_DEVICE_START)
+		ret = -ETIMEDOUT;
+
+	/* clear interrupt status bits */
+	writel(readl(rsb->regs + RSB_INTS), rsb->regs + RSB_INTS);
+
+	return ret;
+}
+
+/*
+ * There are 15 valid runtime addresses, though Allwinner typically
+ * skips the first, for unknown reasons, and uses the following three.
+ *
+ * 0x17, 0x2d, 0x3a, 0x4e, 0x59, 0x63, 0x74, 0x8b,
+ * 0x9c, 0xa6, 0xb1, 0xc5, 0xd2, 0xe8, 0xff
+ *
+ * No designs with 2 RSB slave devices sharing identical hardware
+ * addresses on the same bus have been seen in the wild. All designs
+ * use 0x2d for the primary PMIC, 0x3a for the secondary PMIC if
+ * there is one, and 0x45 for peripheral ICs.
+ *
+ * The hardware does not seem to support re-setting runtime addresses.
+ * Attempts to do so result in the slave devices returning a NACK.
+ * Hence we just hardcode the mapping here, like Allwinner does.
+ */
+
+static const struct sunxi_rsb_addr_map sunxi_rsb_addr_maps[] = {
+	{ 0x3e3, 0x2d }, /* Primary PMIC: AXP223, AXP809, AXP81X, ... */
+	{ 0x745, 0x3a }, /* Secondary PMIC: AXP806, ... */
+	{ 0xe89, 0x45 }, /* Peripheral IC: AC100, ... */
+};
+
+static u8 sunxi_rsb_get_rtaddr(u16 hwaddr)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sunxi_rsb_addr_maps); i++)
+		if (hwaddr == sunxi_rsb_addr_maps[i].hwaddr)
+			return sunxi_rsb_addr_maps[i].rtaddr;
+
+	return 0; /* 0 is an invalid runtime address */
+}
+
+static int of_rsb_register_devices(struct sunxi_rsb *rsb)
+{
+	struct device *dev = rsb->dev;
+	struct device_node *child, *np = dev->of_node;
+	u32 hwaddr;
+	u8 rtaddr;
+	int ret;
+
+	if (!np)
+		return -EINVAL;
+
+	/* Runtime addresses for all slaves should be set first */
+	for_each_available_child_of_node(np, child) {
+		dev_dbg(dev, "setting child %s runtime address\n",
+			child->full_name);
+
+		ret = of_property_read_u32(child, "reg", &hwaddr);
+		if (ret) {
+			dev_err(dev, "%s: invalid 'reg' property: %d\n",
+				child->full_name, ret);
+			continue;
+		}
+
+		rtaddr = sunxi_rsb_get_rtaddr(hwaddr);
+		if (!rtaddr) {
+			dev_err(dev, "%s: unknown hardware device address\n",
+				child->full_name);
+			continue;
+		}
+
+		/*
+		 * Since no devices have been registered yet, we are the
+		 * only ones using the bus, we can skip locking the bus.
+		 */
+
+		/* setup command parameters */
+		writel(RSB_CMD_STRA, rsb->regs + RSB_CMD);
+		writel(RSB_DAR_RTA(rtaddr) | RSB_DAR_DA(hwaddr),
+		       rsb->regs + RSB_DAR);
+
+		/* send command */
+		ret = _sunxi_rsb_run_xfer(rsb);
+		if (ret)
+			dev_warn(dev, "%s: set runtime address failed: %d\n",
+				 child->full_name, ret);
+	}
+
+	/* Then we start adding devices and probing them */
+	for_each_available_child_of_node(np, child) {
+		struct sunxi_rsb_device *rdev;
+
+		dev_dbg(dev, "adding child %s\n", child->full_name);
+
+		ret = of_property_read_u32(child, "reg", &hwaddr);
+		if (ret)
+			continue;
+
+		rtaddr = sunxi_rsb_get_rtaddr(hwaddr);
+		if (!rtaddr)
+			continue;
+
+		rdev = sunxi_rsb_device_create(rsb, child, hwaddr, rtaddr);
+		if (IS_ERR(rdev))
+			dev_err(dev, "failed to add child device %s: %ld\n",
+				child->full_name, PTR_ERR(rdev));
+	}
+
+	return 0;
+}
+
+static const struct of_device_id sunxi_rsb_of_match_table[] = {
+	{ .compatible = "allwinner,sun8i-a23-rsb" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table);
+
+static int sunxi_rsb_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *r;
+	struct sunxi_rsb *rsb;
+	unsigned long p_clk_freq;
+	u32 clk_delay, clk_freq = 3000000;
+	int clk_div, irq, ret;
+	u32 reg;
+
+	of_property_read_u32(np, "clock-frequency", &clk_freq);
+	if (clk_freq > RSB_MAX_FREQ) {
+		dev_err(dev,
+			"clock-frequency (%u Hz) is too high (max = 20MHz)\n",
+			clk_freq);
+		return -EINVAL;
+	}
+
+	rsb = devm_kzalloc(dev, sizeof(*rsb), GFP_KERNEL);
+	if (!rsb)
+		return -ENOMEM;
+
+	rsb->dev = dev;
+	platform_set_drvdata(pdev, rsb);
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rsb->regs = devm_ioremap_resource(dev, r);
+	if (IS_ERR(rsb->regs))
+		return PTR_ERR(rsb->regs);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "failed to retrieve irq: %d\n", irq);
+		return irq;
+	}
+
+	rsb->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(rsb->clk)) {
+		ret = PTR_ERR(rsb->clk);
+		dev_err(dev, "failed to retrieve clk: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(rsb->clk);
+	if (ret) {
+		dev_err(dev, "failed to enable clk: %d\n", ret);
+		return ret;
+	}
+
+	p_clk_freq = clk_get_rate(rsb->clk);
+
+	rsb->rstc = devm_reset_control_get(dev, NULL);
+	if (IS_ERR(rsb->rstc)) {
+		ret = PTR_ERR(rsb->rstc);
+		dev_err(dev, "failed to retrieve reset controller: %d\n", ret);
+		goto err_clk_disable;
+	}
+
+	ret = reset_control_deassert(rsb->rstc);
+	if (ret) {
+		dev_err(dev, "failed to deassert reset line: %d\n", ret);
+		goto err_clk_disable;
+	}
+
+	init_completion(&rsb->complete);
+	mutex_init(&rsb->lock);
+
+	/* reset the controller */
+	writel(RSB_CTRL_SOFT_RST, rsb->regs + RSB_CTRL);
+	readl_poll_timeout(rsb->regs + RSB_CTRL, reg,
+			   !(reg & RSB_CTRL_SOFT_RST), 1000, 100000);
+
+	/*
+	 * Clock frequency and delay calculation code is from
+	 * Allwinner U-boot sources.
+	 *
+	 * From A83 user manual:
+	 * bus clock frequency = parent clock frequency / (2 * (divider + 1))
+	 */
+	clk_div = p_clk_freq / clk_freq / 2;
+	if (!clk_div)
+		clk_div = 1;
+	else if (clk_div > RSB_CCR_MAX_CLK_DIV + 1)
+		clk_div = RSB_CCR_MAX_CLK_DIV + 1;
+
+	clk_delay = clk_div >> 1;
+	if (!clk_delay)
+		clk_delay = 1;
+
+	dev_info(dev, "RSB running at %lu Hz\n", p_clk_freq / clk_div / 2);
+	writel(RSB_CCR_SDA_OUT_DELAY(clk_delay) | RSB_CCR_CLK_DIV(clk_div - 1),
+	       rsb->regs + RSB_CCR);
+
+	ret = devm_request_irq(dev, irq, sunxi_rsb_irq, 0, RSB_CTRL_NAME, rsb);
+	if (ret) {
+		dev_err(dev, "can't register interrupt handler irq %d: %d\n",
+			irq, ret);
+		goto err_reset_assert;
+	}
+
+	/* initialize all devices on the bus into RSB mode */
+	ret = sunxi_rsb_init_device_mode(rsb);
+	if (ret)
+		dev_warn(dev, "Initialize device mode failed: %d\n", ret);
+
+	of_rsb_register_devices(rsb);
+
+	return 0;
+
+err_reset_assert:
+	reset_control_assert(rsb->rstc);
+
+err_clk_disable:
+	clk_disable_unprepare(rsb->clk);
+
+	return ret;
+}
+
+static int sunxi_rsb_remove(struct platform_device *pdev)
+{
+	struct sunxi_rsb *rsb = platform_get_drvdata(pdev);
+
+	device_for_each_child(rsb->dev, NULL, sunxi_rsb_remove_devices);
+	reset_control_assert(rsb->rstc);
+	clk_disable_unprepare(rsb->clk);
+
+	return 0;
+}
+
+static struct platform_driver sunxi_rsb_driver = {
+	.probe = sunxi_rsb_probe,
+	.remove	= sunxi_rsb_remove,
+	.driver	= {
+		.name = RSB_CTRL_NAME,
+		.of_match_table = sunxi_rsb_of_match_table,
+	},
+};
+
+static int __init sunxi_rsb_init(void)
+{
+	int ret;
+
+	ret = bus_register(&sunxi_rsb_bus);
+	if (ret) {
+		pr_err("failed to register sunxi sunxi_rsb bus: %d\n", ret);
+		return ret;
+	}
+
+	return platform_driver_register(&sunxi_rsb_driver);
+}
+module_init(sunxi_rsb_init);
+
+static void __exit sunxi_rsb_exit(void)
+{
+	platform_driver_unregister(&sunxi_rsb_driver);
+	bus_unregister(&sunxi_rsb_bus);
+}
+module_exit(sunxi_rsb_exit);
+
+MODULE_AUTHOR("Chen-Yu Tsai <wens@csie.org>");
+MODULE_DESCRIPTION("Allwinner sunXi Reduced Serial Bus controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sunxi-rsb.h b/include/linux/sunxi-rsb.h
new file mode 100644
index 000000000000..7e75bb0346d0
--- /dev/null
+++ b/include/linux/sunxi-rsb.h
@@ -0,0 +1,105 @@
+/*
+ * Allwinner Reduced Serial Bus Driver
+ *
+ * Copyright (c) 2015 Chen-Yu Tsai
+ *
+ * Author: Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#ifndef _SUNXI_RSB_H
+#define _SUNXI_RSB_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+
+struct sunxi_rsb;
+
+/**
+ * struct sunxi_rsb_device - Basic representation of an RSB device
+ * @dev:	Driver model representation of the device.
+ * @ctrl:	RSB controller managing the bus hosting this device.
+ * @rtaddr:	This device's runtime address
+ * @hwaddr:	This device's hardware address
+ */
+struct sunxi_rsb_device {
+	struct device		dev;
+	struct sunxi_rsb	*rsb;
+	int			irq;
+	u8			rtaddr;
+	u16			hwaddr;
+};
+
+static inline struct sunxi_rsb_device *to_sunxi_rsb_device(struct device *d)
+{
+	return container_of(d, struct sunxi_rsb_device, dev);
+}
+
+static inline void *sunxi_rsb_device_get_drvdata(const struct sunxi_rsb_device *rdev)
+{
+	return dev_get_drvdata(&rdev->dev);
+}
+
+static inline void sunxi_rsb_device_set_drvdata(struct sunxi_rsb_device *rdev,
+						void *data)
+{
+	dev_set_drvdata(&rdev->dev, data);
+}
+
+/**
+ * struct sunxi_rsb_driver - RSB slave device driver
+ * @driver:	RSB device drivers should initialize name and owner field of
+ *		this structure.
+ * @probe:	binds this driver to a RSB device.
+ * @remove:	unbinds this driver from the RSB device.
+ */
+struct sunxi_rsb_driver {
+	struct device_driver driver;
+	int (*probe)(struct sunxi_rsb_device *rdev);
+	int (*remove)(struct sunxi_rsb_device *rdev);
+};
+
+static inline struct sunxi_rsb_driver *to_sunxi_rsb_driver(struct device_driver *d)
+{
+	return container_of(d, struct sunxi_rsb_driver, driver);
+}
+
+int sunxi_rsb_driver_register(struct sunxi_rsb_driver *rdrv);
+
+/**
+ * sunxi_rsb_driver_unregister() - unregister an RSB client driver
+ * @rdrv:	the driver to unregister
+ */
+static inline void sunxi_rsb_driver_unregister(struct sunxi_rsb_driver *rdrv)
+{
+	if (rdrv)
+		driver_unregister(&rdrv->driver);
+}
+
+#define module_sunxi_rsb_driver(__sunxi_rsb_driver) \
+	module_driver(__sunxi_rsb_driver, sunxi_rsb_driver_register, \
+			sunxi_rsb_driver_unregister)
+
+struct regmap *__devm_regmap_init_sunxi_rsb(struct sunxi_rsb_device *rdev,
+					    const struct regmap_config *config,
+					    struct lock_class_key *lock_key,
+					    const char *lock_name);
+
+/**
+ * devm_regmap_init_sunxi_rsb(): Initialise managed register map
+ *
+ * @rdev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_sunxi_rsb(rdev, config)			\
+	__regmap_lockdep_wrapper(__devm_regmap_init_sunxi_rsb, #config,	\
+				 rdev, config)
+
+#endif /* _SUNXI_RSB_H */
-- 
cgit v1.2.3


From 1be5336bc7ba050ee07d352643bf4c01c513553c Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 16 Oct 2015 10:18:10 +0300
Subject: dmaengine: edma: New device tree binding

With the old binding and driver architecture we had many issues:
No way to assign eDMA channels to event queues, thus not able to tune the
system by moving specific DMA channels to low/high priority servicing. We
moved the cyclic channels to high priority within the code, but that was
just a workaround to this issue.
Memcopy was fundamentally broken: even if the driver scanned the DT/devices
in the booted system for direct DMA users (which is not effective when the
events are going through a crossbar) and created a map of 'used' channels,
this information was not really usable. Since via dmaengien API the eDMA
driver will be called with _some_ channel number, we would try to request
this channel when any channel is requested for memcpy. By luck we got
channel which is not used by any device most of the time so things worked,
but if a device would have been using the given channel, but not requested
it, the memcpy channel would have been waiting for HW event.
The old code had the am33xx/am43xx DMA event router handling embedded. This
should have been done in a separate driver since it is not part of the
actual eDMA IP.
There were no way to 'lock' PaRAM slots to be used by the DSP for example
when booting with DT.
In DT boot the edma node used more than one hwmod which is not a good
practice and the kernel prints warning because of this.

With the new bindings and the changes in the driver we can:
- No regression with Legacy binding and non DT boot
- DMA channels can be assigned to any TC (to set priority)
- PaRAM slots can be reserved for other cores to use
- Dynamic power management for CC and TCs, if only TC0 is used all other TC
  can be powered down for example

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 Documentation/devicetree/bindings/dma/ti-edma.txt | 117 +++++-
 drivers/dma/edma.c                                | 486 +++++++++++++++-------
 include/linux/platform_data/edma.h                |   3 +
 3 files changed, 459 insertions(+), 147 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt
index 5ba525a10035..d3d0a4fb1c73 100644
--- a/Documentation/devicetree/bindings/dma/ti-edma.txt
+++ b/Documentation/devicetree/bindings/dma/ti-edma.txt
@@ -1,4 +1,119 @@
-TI EDMA
+Texas Instruments eDMA
+
+The eDMA3 consists of two components: Channel controller (CC) and Transfer
+Controller(s) (TC). The CC is the main entry for DMA users since it is
+responsible for the DMA channel handling, while the TCs are responsible to
+execute the actual DMA tansfer.
+
+------------------------------------------------------------------------------
+eDMA3 Channel Controller
+
+Required properties:
+- compatible:	"ti,edma3-tpcc" for the channel controller(s)
+- #dma-cells:	Should be set to <2>. The first number is the DMA request
+		number and the second is the TC the channel is serviced on.
+- reg:		Memory map of eDMA CC
+- reg-names:	"edma3_cc"
+- interrupts:	Interrupt lines for CCINT, MPERR and CCERRINT.
+- interrupt-names: "edma3_ccint", "emda3_mperr" and "edma3_ccerrint"
+- ti,tptcs:	List of TPTCs associated with the eDMA in the following form:
+		<&tptc_phandle TC_priority_number>. The highest priority is 0.
+
+Optional properties:
+- ti,hwmods:	Name of the hwmods associated to the eDMA CC
+- ti,edma-memcpy-channels: List of channels allocated to be used for memcpy, iow
+		these channels will be SW triggered channels. The list must
+		contain 16 bits numbers, see example.
+- ti,edma-reserved-slot-ranges: PaRAM slot ranges which should not be used by
+		the driver, they are allocated to be used by for example the
+		DSP. See example.
+
+------------------------------------------------------------------------------
+eDMA3 Transfer Controller
+
+Required properties:
+- compatible:	"ti,edma3-tptc" for the transfer controller(s)
+- reg:		Memory map of eDMA TC
+- interrupts:	Interrupt number for TCerrint.
+
+Optional properties:
+- ti,hwmods:	Name of the hwmods associated to the given eDMA TC
+- interrupt-names: "edma3_tcerrint"
+
+------------------------------------------------------------------------------
+Example:
+
+edma: edma@49000000 {
+	compatible = "ti,edma3-tpcc";
+	ti,hwmods = "tpcc";
+	reg =	<0x49000000 0x10000>;
+	reg-names = "edma3_cc";
+	interrupts = <12 13 14>;
+	interrupt-names = "edma3_ccint", "emda3_mperr", "edma3_ccerrint";
+	dma-requests = <64>;
+	#dma-cells = <2>;
+
+	ti,tptcs = <&edma_tptc0 7>, <&edma_tptc1 7>, <&edma_tptc2 0>;
+
+	/* Channel 20 and 21 is allocated for memcpy */
+	ti,edma-memcpy-channels = /bits/ 16 <20 21>;
+	/* The following PaRAM slots are reserved: 35-45 and 100-110 */
+	ti,edma-reserved-slot-ranges = /bits/ 16 <35 10>,
+				       /bits/ 16 <100 10>;
+};
+
+edma_tptc0: tptc@49800000 {
+	compatible = "ti,edma3-tptc";
+	ti,hwmods = "tptc0";
+	reg =	<0x49800000 0x100000>;
+	interrupts = <112>;
+	interrupt-names = "edm3_tcerrint";
+};
+
+edma_tptc1: tptc@49900000 {
+	compatible = "ti,edma3-tptc";
+	ti,hwmods = "tptc1";
+	reg =	<0x49900000 0x100000>;
+	interrupts = <113>;
+	interrupt-names = "edm3_tcerrint";
+};
+
+edma_tptc2: tptc@49a00000 {
+	compatible = "ti,edma3-tptc";
+	ti,hwmods = "tptc2";
+	reg =	<0x49a00000 0x100000>;
+	interrupts = <114>;
+	interrupt-names = "edm3_tcerrint";
+};
+
+sham: sham@53100000 {
+	compatible = "ti,omap4-sham";
+	ti,hwmods = "sham";
+	reg = <0x53100000 0x200>;
+	interrupts = <109>;
+	/* DMA channel 36 executed on eDMA TC0 - low priority queue */
+	dmas = <&edma 36 0>;
+	dma-names = "rx";
+};
+
+mcasp0: mcasp@48038000 {
+	compatible = "ti,am33xx-mcasp-audio";
+	ti,hwmods = "mcasp0";
+	reg = <0x48038000 0x2000>,
+		<0x46000000 0x400000>;
+	reg-names = "mpu", "dat";
+	interrupts = <80>, <81>;
+	interrupt-names = "tx", "rx";
+	status = "disabled";
+	/* DMA channels 8 and 9 executed on eDMA TC2 - high priority queue */
+	dmas = <&edma 8 2>,
+	       <&edma 9 2>;
+	dma-names = "tx", "rx";
+};
+
+------------------------------------------------------------------------------
+DEPRECATED binding, new DTS files must use the ti,edma3-tpcc/ti,edma3-tptc
+binding.
 
 Required properties:
 - compatible : "ti,edma3"
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index d4d71e60da1b..31722d436a42 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -201,13 +201,20 @@ struct edma_desc {
 
 struct edma_cc;
 
+struct edma_tc {
+	struct device_node		*node;
+	u16				id;
+};
+
 struct edma_chan {
 	struct virt_dma_chan		vchan;
 	struct list_head		node;
 	struct edma_desc		*edesc;
 	struct edma_cc			*ecc;
+	struct edma_tc			*tc;
 	int				ch_num;
 	bool				alloced;
+	bool				hw_triggered;
 	int				slot[EDMA_MAX_SLOTS];
 	int				missed;
 	struct dma_slave_config		cfg;
@@ -218,6 +225,7 @@ struct edma_cc {
 	struct edma_soc_info		*info;
 	void __iomem			*base;
 	int				id;
+	bool				legacy_mode;
 
 	/* eDMA3 resource information */
 	unsigned			num_channels;
@@ -228,20 +236,16 @@ struct edma_cc {
 	bool				chmap_exist;
 	enum dma_event_q		default_queue;
 
-	bool				unused_chan_list_done;
-	/* The slot_inuse bit for each PaRAM slot is clear unless the
-	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
+	/*
+	 * The slot_inuse bit for each PaRAM slot is clear unless the slot is
+	 * in use by Linux or if it is allocated to be used by DSP.
 	 */
 	unsigned long *slot_inuse;
 
-	/* The channel_unused bit for each channel is clear unless
-	 * it is not being used on this platform. It uses a bit
-	 * of SOC-specific initialization code.
-	 */
-	unsigned long *channel_unused;
-
 	struct dma_device		dma_slave;
+	struct dma_device		*dma_memcpy;
 	struct edma_chan		*slave_chans;
+	struct edma_tc			*tc_list;
 	int				dummy_slot;
 };
 
@@ -251,8 +255,17 @@ static const struct edmacc_param dummy_paramset = {
 	.ccnt = 1,
 };
 
+#define EDMA_BINDING_LEGACY	0
+#define EDMA_BINDING_TPCC	1
 static const struct of_device_id edma_of_ids[] = {
-	{ .compatible = "ti,edma3", },
+	{
+		.compatible = "ti,edma3",
+		.data = (void *)EDMA_BINDING_LEGACY,
+	},
+	{
+		.compatible = "ti,edma3-tpcc",
+		.data = (void *)EDMA_BINDING_TPCC,
+	},
 	{}
 };
 
@@ -412,60 +425,6 @@ static void edma_set_chmap(struct edma_chan *echan, int slot)
 	}
 }
 
-static int prepare_unused_channel_list(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct edma_cc *ecc = data;
-	int dma_req_min = EDMA_CTLR_CHAN(ecc->id, 0);
-	int dma_req_max = dma_req_min + ecc->num_channels;
-	int i, count;
-	struct of_phandle_args  dma_spec;
-
-	if (dev->of_node) {
-		struct platform_device *dma_pdev;
-
-		count = of_property_count_strings(dev->of_node, "dma-names");
-		if (count < 0)
-			return 0;
-		for (i = 0; i < count; i++) {
-			if (of_parse_phandle_with_args(dev->of_node, "dmas",
-						       "#dma-cells", i,
-						       &dma_spec))
-				continue;
-
-			if (!of_match_node(edma_of_ids, dma_spec.np)) {
-				of_node_put(dma_spec.np);
-				continue;
-			}
-
-			dma_pdev = of_find_device_by_node(dma_spec.np);
-			if (&dma_pdev->dev != ecc->dev)
-				continue;
-
-			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
-				  ecc->channel_unused);
-			of_node_put(dma_spec.np);
-		}
-		return 0;
-	}
-
-	/* For non-OF case */
-	for (i = 0; i < pdev->num_resources; i++) {
-		struct resource	*res = &pdev->resource[i];
-		int dma_req;
-
-		if (!(res->flags & IORESOURCE_DMA))
-			continue;
-
-		dma_req = (int)res->start;
-		if (dma_req >= dma_req_min && dma_req < dma_req_max)
-			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
-				  ecc->channel_unused);
-	}
-
-	return 0;
-}
-
 static void edma_setup_interrupt(struct edma_chan *echan, bool enable)
 {
 	struct edma_cc *ecc = echan->ecc;
@@ -617,7 +576,7 @@ static void edma_start(struct edma_chan *echan)
 	int j = (channel >> 5);
 	unsigned int mask = BIT(channel & 0x1f);
 
-	if (test_bit(channel, ecc->channel_unused)) {
+	if (!echan->hw_triggered) {
 		/* EDMA channels without event association */
 		dev_dbg(ecc->dev, "ESR%d %08x\n", j,
 			edma_shadow0_read_array(ecc, SH_ESR, j));
@@ -734,20 +693,6 @@ static int edma_alloc_channel(struct edma_chan *echan,
 	struct edma_cc *ecc = echan->ecc;
 	int channel = EDMA_CHAN_SLOT(echan->ch_num);
 
-	if (!ecc->unused_chan_list_done) {
-		/*
-		 * Scan all the platform devices to find out the EDMA channels
-		 * used and clear them in the unused list, making the rest
-		 * available for ARM usage.
-		 */
-		int ret = bus_for_each_dev(&platform_bus_type, NULL, ecc,
-					   prepare_unused_channel_list);
-		if (ret < 0)
-			return ret;
-
-		ecc->unused_chan_list_done = true;
-	}
-
 	/* ensure access through shadow region 0 */
 	edma_or_array2(ecc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
 
@@ -899,7 +844,7 @@ static int edma_terminate_all(struct dma_chan *chan)
 	if (echan->edesc) {
 		edma_stop(echan);
 		/* Move the cyclic channel back to default queue */
-		if (echan->edesc->cyclic)
+		if (!echan->tc && echan->edesc->cyclic)
 			edma_assign_channel_eventq(echan, EVENTQ_DEFAULT);
 		/*
 		 * free the running request descriptor
@@ -1403,7 +1348,8 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	}
 
 	/* Place the cyclic channel to highest priority queue */
-	edma_assign_channel_eventq(echan, EVENTQ_0);
+	if (!echan->tc)
+		edma_assign_channel_eventq(echan, EVENTQ_0);
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -1609,18 +1555,54 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable)
+{
+	struct platform_device *tc_pdev;
+	int ret;
+
+	if (!tc)
+		return;
+
+	tc_pdev = of_find_device_by_node(tc->node);
+	if (!tc_pdev) {
+		pr_err("%s: TPTC device is not found\n", __func__);
+		return;
+	}
+	if (!pm_runtime_enabled(&tc_pdev->dev))
+		pm_runtime_enable(&tc_pdev->dev);
+
+	if (enable)
+		ret = pm_runtime_get_sync(&tc_pdev->dev);
+	else
+		ret = pm_runtime_put_sync(&tc_pdev->dev);
+
+	if (ret < 0)
+		pr_err("%s: pm_runtime_%s_sync() failed for %s\n", __func__,
+		       enable ? "get" : "put", dev_name(&tc_pdev->dev));
+}
+
 /* Alloc channel resources */
 static int edma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
-	struct device *dev = chan->device->dev;
+	struct edma_cc *ecc = echan->ecc;
+	struct device *dev = ecc->dev;
+	enum dma_event_q eventq_no = EVENTQ_DEFAULT;
 	int ret;
 
-	ret = edma_alloc_channel(echan, EVENTQ_DEFAULT);
+	if (echan->tc) {
+		eventq_no = echan->tc->id;
+	} else if (ecc->tc_list) {
+		/* memcpy channel */
+		echan->tc = &ecc->tc_list[ecc->info->default_queue];
+		eventq_no = echan->tc->id;
+	}
+
+	ret = edma_alloc_channel(echan, eventq_no);
 	if (ret)
 		return ret;
 
-	echan->slot[0] = edma_alloc_slot(echan->ecc, echan->ch_num);
+	echan->slot[0] = edma_alloc_slot(ecc, echan->ch_num);
 	if (echan->slot[0] < 0) {
 		dev_err(dev, "Entry slot allocation failed for channel %u\n",
 			EDMA_CHAN_SLOT(echan->ch_num));
@@ -1631,8 +1613,11 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
 	edma_set_chmap(echan, echan->slot[0]);
 	echan->alloced = true;
 
-	dev_dbg(dev, "allocated channel %d for %u:%u\n", echan->ch_num,
-		EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num));
+	dev_dbg(dev, "Got eDMA channel %d for virt channel %d (%s trigger)\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id,
+		echan->hw_triggered ? "HW" : "SW");
+
+	edma_tc_set_pm_state(echan->tc, true);
 
 	return 0;
 
@@ -1645,6 +1630,7 @@ err_slot:
 static void edma_free_chan_resources(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = echan->ecc->dev;
 	int i;
 
 	/* Terminate transfers */
@@ -1669,7 +1655,12 @@ static void edma_free_chan_resources(struct dma_chan *chan)
 		echan->alloced = false;
 	}
 
-	dev_dbg(chan->device->dev, "freeing channel for %u\n", echan->ch_num);
+	edma_tc_set_pm_state(echan->tc, false);
+	echan->tc = NULL;
+	echan->hw_triggered = false;
+
+	dev_dbg(dev, "Free eDMA channel %d for virt channel %d\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id);
 }
 
 /* Send pending descriptor to hardware */
@@ -1756,41 +1747,90 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
 	return ret;
 }
 
+static bool edma_is_memcpy_channel(int ch_num, u16 *memcpy_channels)
+{
+	s16 *memcpy_ch = memcpy_channels;
+
+	if (!memcpy_channels)
+		return false;
+	while (*memcpy_ch != -1) {
+		if (*memcpy_ch == ch_num)
+			return true;
+		memcpy_ch++;
+	}
+	return false;
+}
+
 #define EDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
 				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
 				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
 				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
 
-static void edma_dma_init(struct edma_cc *ecc)
+static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 {
-	struct dma_device *ddev = &ecc->dma_slave;
+	struct dma_device *s_ddev = &ecc->dma_slave;
+	struct dma_device *m_ddev = NULL;
+	s16 *memcpy_channels = ecc->info->memcpy_channels;
 	int i, j;
 
-	dma_cap_zero(ddev->cap_mask);
-	dma_cap_set(DMA_SLAVE, ddev->cap_mask);
-	dma_cap_set(DMA_CYCLIC, ddev->cap_mask);
-	dma_cap_set(DMA_MEMCPY, ddev->cap_mask);
+	dma_cap_zero(s_ddev->cap_mask);
+	dma_cap_set(DMA_SLAVE, s_ddev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, s_ddev->cap_mask);
+	if (ecc->legacy_mode && !memcpy_channels) {
+		dev_warn(ecc->dev,
+			 "Legacy memcpy is enabled, things might not work\n");
 
-	ddev->device_prep_slave_sg = edma_prep_slave_sg;
-	ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic;
-	ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
-	ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
-	ddev->device_free_chan_resources = edma_free_chan_resources;
-	ddev->device_issue_pending = edma_issue_pending;
-	ddev->device_tx_status = edma_tx_status;
-	ddev->device_config = edma_slave_config;
-	ddev->device_pause = edma_dma_pause;
-	ddev->device_resume = edma_dma_resume;
-	ddev->device_terminate_all = edma_terminate_all;
-
-	ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
-	ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
-	ddev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
-
-	ddev->dev = ecc->dev;
+		dma_cap_set(DMA_MEMCPY, s_ddev->cap_mask);
+		s_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		s_ddev->directions = BIT(DMA_MEM_TO_MEM);
+	}
 
-	INIT_LIST_HEAD(&ddev->channels);
+	s_ddev->device_prep_slave_sg = edma_prep_slave_sg;
+	s_ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic;
+	s_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+	s_ddev->device_free_chan_resources = edma_free_chan_resources;
+	s_ddev->device_issue_pending = edma_issue_pending;
+	s_ddev->device_tx_status = edma_tx_status;
+	s_ddev->device_config = edma_slave_config;
+	s_ddev->device_pause = edma_dma_pause;
+	s_ddev->device_resume = edma_dma_resume;
+	s_ddev->device_terminate_all = edma_terminate_all;
+
+	s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV));
+	s_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	s_ddev->dev = ecc->dev;
+	INIT_LIST_HEAD(&s_ddev->channels);
+
+	if (memcpy_channels) {
+		m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+		ecc->dma_memcpy = m_ddev;
+
+		dma_cap_zero(m_ddev->cap_mask);
+		dma_cap_set(DMA_MEMCPY, m_ddev->cap_mask);
+
+		m_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		m_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+		m_ddev->device_free_chan_resources = edma_free_chan_resources;
+		m_ddev->device_issue_pending = edma_issue_pending;
+		m_ddev->device_tx_status = edma_tx_status;
+		m_ddev->device_config = edma_slave_config;
+		m_ddev->device_pause = edma_dma_pause;
+		m_ddev->device_resume = edma_dma_resume;
+		m_ddev->device_terminate_all = edma_terminate_all;
+
+		m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->directions = BIT(DMA_MEM_TO_MEM);
+		m_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+		m_ddev->dev = ecc->dev;
+		INIT_LIST_HEAD(&m_ddev->channels);
+	} else if (!ecc->legacy_mode) {
+		dev_info(ecc->dev, "memcpy is disabled\n");
+	}
 
 	for (i = 0; i < ecc->num_channels; i++) {
 		struct edma_chan *echan = &ecc->slave_chans[i];
@@ -1798,7 +1838,10 @@ static void edma_dma_init(struct edma_cc *ecc)
 		echan->ecc = ecc;
 		echan->vchan.desc_free = edma_desc_free;
 
-		vchan_init(&echan->vchan, ddev);
+		if (m_ddev && edma_is_memcpy_channel(i, memcpy_channels))
+			vchan_init(&echan->vchan, m_ddev);
+		else
+			vchan_init(&echan->vchan, s_ddev);
 
 		INIT_LIST_HEAD(&echan->node);
 		for (j = 0; j < EDMA_MAX_SLOTS; j++)
@@ -1921,7 +1964,8 @@ static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata,
 	return 0;
 }
 
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
 {
 	struct edma_soc_info *info;
 	struct property *prop;
@@ -1932,20 +1976,121 @@ static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
-	prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map", &sz);
+	if (legacy_mode) {
+		prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map",
+					&sz);
+		if (prop) {
+			ret = edma_xbar_event_map(dev, info, sz);
+			if (ret)
+				return ERR_PTR(ret);
+		}
+		return info;
+	}
+
+	/* Get the list of channels allocated to be used for memcpy */
+	prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz);
+	if (prop) {
+		const char pname[] = "ti,edma-memcpy-channels";
+		size_t nelm = sz / sizeof(s16);
+		s16 *memcpy_ch;
+
+		memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s16),
+					 GFP_KERNEL);
+		if (!memcpy_ch)
+			return ERR_PTR(-ENOMEM);
+
+		ret = of_property_read_u16_array(dev->of_node, pname,
+						 (u16 *)memcpy_ch, nelm);
+		if (ret)
+			return ERR_PTR(ret);
+
+		memcpy_ch[nelm] = -1;
+		info->memcpy_channels = memcpy_ch;
+	}
+
+	prop = of_find_property(dev->of_node, "ti,edma-reserved-slot-ranges",
+				&sz);
 	if (prop) {
-		ret = edma_xbar_event_map(dev, info, sz);
+		const char pname[] = "ti,edma-reserved-slot-ranges";
+		s16 (*rsv_slots)[2];
+		size_t nelm = sz / sizeof(*rsv_slots);
+		struct edma_rsv_info *rsv_info;
+
+		if (!nelm)
+			return info;
+
+		rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL);
+		if (!rsv_info)
+			return ERR_PTR(-ENOMEM);
+
+		rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots),
+					 GFP_KERNEL);
+		if (!rsv_slots)
+			return ERR_PTR(-ENOMEM);
+
+		ret = of_property_read_u16_array(dev->of_node, pname,
+						 (u16 *)rsv_slots, nelm * 2);
 		if (ret)
 			return ERR_PTR(ret);
+
+		rsv_slots[nelm][0] = -1;
+		rsv_slots[nelm][1] = -1;
+		info->rsv = rsv_info;
+		info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots;
 	}
 
 	return info;
 }
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct edma_cc *ecc = ofdma->of_dma_data;
+	struct dma_chan *chan = NULL;
+	struct edma_chan *echan;
+	int i;
+
+	if (!ecc || dma_spec->args_count < 1)
+		return NULL;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		echan = &ecc->slave_chans[i];
+		if (echan->ch_num == dma_spec->args[0]) {
+			chan = &echan->vchan.chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return NULL;
+
+	if (echan->ecc->legacy_mode && dma_spec->args_count == 1)
+		goto out;
+
+	if (!echan->ecc->legacy_mode && dma_spec->args_count == 2 &&
+	    dma_spec->args[1] < echan->ecc->num_tc) {
+		echan->tc = &echan->ecc->tc_list[dma_spec->args[1]];
+		goto out;
+	}
+
+	return NULL;
+out:
+	/* The channel is going to be used as HW synchronized */
+	echan->hw_triggered = true;
+	return dma_get_slave_channel(chan);
+}
 #else
-static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev)
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
 {
 	return ERR_PTR(-EINVAL);
 }
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	return NULL;
+}
 #endif
 
 static int edma_probe(struct platform_device *pdev)
@@ -1953,7 +2098,6 @@ static int edma_probe(struct platform_device *pdev)
 	struct edma_soc_info	*info = pdev->dev.platform_data;
 	s8			(*queue_priority_mapping)[2];
 	int			i, off, ln;
-	const s16		(*rsv_chans)[2];
 	const s16		(*rsv_slots)[2];
 	const s16		(*xbar_chans)[2];
 	int			irq;
@@ -1962,10 +2106,17 @@ static int edma_probe(struct platform_device *pdev)
 	struct device_node	*node = pdev->dev.of_node;
 	struct device		*dev = &pdev->dev;
 	struct edma_cc		*ecc;
+	bool			legacy_mode = true;
 	int ret;
 
 	if (node) {
-		info = edma_setup_info_from_dt(dev);
+		const struct of_device_id *match;
+
+		match = of_match_node(edma_of_ids, node);
+		if (match && (u32)match->data == EDMA_BINDING_TPCC)
+			legacy_mode = false;
+
+		info = edma_setup_info_from_dt(dev, legacy_mode);
 		if (IS_ERR(info)) {
 			dev_err(dev, "failed to get DT data\n");
 			return PTR_ERR(info);
@@ -1994,6 +2145,7 @@ static int edma_probe(struct platform_device *pdev)
 
 	ecc->dev = dev;
 	ecc->id = pdev->id;
+	ecc->legacy_mode = legacy_mode;
 	/* When booting with DT the pdev->id is -1 */
 	if (ecc->id < 0)
 		ecc->id = 0;
@@ -2024,12 +2176,6 @@ static int edma_probe(struct platform_device *pdev)
 	if (!ecc->slave_chans)
 		return -ENOMEM;
 
-	ecc->channel_unused = devm_kcalloc(dev,
-					   BITS_TO_LONGS(ecc->num_channels),
-					   sizeof(unsigned long), GFP_KERNEL);
-	if (!ecc->channel_unused)
-		return -ENOMEM;
-
 	ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots),
 				       sizeof(unsigned long), GFP_KERNEL);
 	if (!ecc->slot_inuse)
@@ -2040,20 +2186,7 @@ static int edma_probe(struct platform_device *pdev)
 	for (i = 0; i < ecc->num_slots; i++)
 		edma_write_slot(ecc, i, &dummy_paramset);
 
-	/* Mark all channels as unused */
-	memset(ecc->channel_unused, 0xff, sizeof(ecc->channel_unused));
-
 	if (info->rsv) {
-		/* Clear the reserved channels in unused list */
-		rsv_chans = info->rsv->rsv_chans;
-		if (rsv_chans) {
-			for (i = 0; rsv_chans[i][0] != -1; i++) {
-				off = rsv_chans[i][0];
-				ln = rsv_chans[i][1];
-				clear_bits(off, ln, ecc->channel_unused);
-			}
-		}
-
 		/* Set the reserved slots in inuse list */
 		rsv_slots = info->rsv->rsv_slots;
 		if (rsv_slots) {
@@ -2070,7 +2203,6 @@ static int edma_probe(struct platform_device *pdev)
 	if (xbar_chans) {
 		for (i = 0; xbar_chans[i][1] != -1; i++) {
 			off = xbar_chans[i][1];
-			clear_bits(off, 1, ecc->channel_unused);
 		}
 	}
 
@@ -2112,6 +2244,31 @@ static int edma_probe(struct platform_device *pdev)
 
 	queue_priority_mapping = info->queue_priority_mapping;
 
+	if (!ecc->legacy_mode) {
+		int lowest_priority = 0;
+		struct of_phandle_args tc_args;
+
+		ecc->tc_list = devm_kcalloc(dev, ecc->num_tc,
+					    sizeof(*ecc->tc_list), GFP_KERNEL);
+		if (!ecc->tc_list)
+			return -ENOMEM;
+
+		for (i = 0;; i++) {
+			ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs",
+							       1, i, &tc_args);
+			if (ret || i == ecc->num_tc)
+				break;
+
+			ecc->tc_list[i].node = tc_args.np;
+			ecc->tc_list[i].id = i;
+			queue_priority_mapping[i][1] = tc_args.args[0];
+			if (queue_priority_mapping[i][1] > lowest_priority) {
+				lowest_priority = queue_priority_mapping[i][1];
+				info->default_queue = i;
+			}
+		}
+	}
+
 	/* Event queue priority mapping */
 	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
 		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
@@ -2125,7 +2282,7 @@ static int edma_probe(struct platform_device *pdev)
 	ecc->info = info;
 
 	/* Init the dma device and channels */
-	edma_dma_init(ecc);
+	edma_dma_init(ecc, legacy_mode);
 
 	for (i = 0; i < ecc->num_channels; i++) {
 		/* Assign all channels to the default queue */
@@ -2136,12 +2293,23 @@ static int edma_probe(struct platform_device *pdev)
 	}
 
 	ret = dma_async_device_register(&ecc->dma_slave);
-	if (ret)
+	if (ret) {
+		dev_err(dev, "slave ddev registration failed (%d)\n", ret);
 		goto err_reg1;
+	}
+
+	if (ecc->dma_memcpy) {
+		ret = dma_async_device_register(ecc->dma_memcpy);
+		if (ret) {
+			dev_err(dev, "memcpy ddev registration failed (%d)\n",
+				ret);
+			dma_async_device_unregister(&ecc->dma_slave);
+			goto err_reg1;
+		}
+	}
 
 	if (node)
-		of_dma_controller_register(node, of_dma_xlate_by_chan_id,
-					   &ecc->dma_slave);
+		of_dma_controller_register(node, of_edma_xlate, ecc);
 
 	dev_info(dev, "TI EDMA DMA engine driver\n");
 
@@ -2160,12 +2328,30 @@ static int edma_remove(struct platform_device *pdev)
 	if (dev->of_node)
 		of_dma_controller_free(dev->of_node);
 	dma_async_device_unregister(&ecc->dma_slave);
+	if (ecc->dma_memcpy)
+		dma_async_device_unregister(ecc->dma_memcpy);
 	edma_free_slot(ecc, ecc->dummy_slot);
 
 	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int edma_pm_suspend(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	struct edma_chan *echan = ecc->slave_chans;
+	int i;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (echan[i].alloced) {
+			edma_setup_interrupt(&echan[i], false);
+			edma_tc_set_pm_state(echan[i].tc, false);
+		}
+	}
+
+	return 0;
+}
+
 static int edma_pm_resume(struct device *dev)
 {
 	struct edma_cc *ecc = dev_get_drvdata(dev);
@@ -2190,6 +2376,8 @@ static int edma_pm_resume(struct device *dev)
 
 			/* Set up channel -> slot mapping for the entry slot */
 			edma_set_chmap(&echan[i], echan[i].slot[0]);
+
+			edma_tc_set_pm_state(echan[i].tc, true);
 		}
 	}
 
@@ -2198,7 +2386,7 @@ static int edma_pm_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops edma_pm_ops = {
-	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, edma_pm_resume)
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(edma_pm_suspend, edma_pm_resume)
 };
 
 static struct platform_driver edma_driver = {
@@ -2213,12 +2401,18 @@ static struct platform_driver edma_driver = {
 
 bool edma_filter_fn(struct dma_chan *chan, void *param)
 {
+	bool match = false;
+
 	if (chan->device->dev->driver == &edma_driver.driver) {
 		struct edma_chan *echan = to_edma_chan(chan);
 		unsigned ch_req = *(unsigned *)param;
-		return ch_req == echan->ch_num;
+		if (ch_req == echan->ch_num) {
+			/* The channel is going to be used as HW synchronized */
+			echan->hw_triggered = true;
+			match = true;
+		}
 	}
-	return false;
+	return match;
 }
 EXPORT_SYMBOL(edma_filter_fn);
 
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 6b9d500956e4..e2878baeb90e 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -71,6 +71,9 @@ struct edma_soc_info {
 	/* Resource reservation for other cores */
 	struct edma_rsv_info	*rsv;
 
+	/* List of channels allocated for memcpy, terminated with -1 */
+	s16			*memcpy_channels;
+
 	s8	(*queue_priority_mapping)[2];
 	const s16	(*xbar_chans)[2];
 };
-- 
cgit v1.2.3


From 412a15c0fe537c59c794d4e8134580b9cb984a0c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Tue, 13 Oct 2015 19:11:36 +0300
Subject: svcrdma: Port to new memory registration API

Instead of maintaining a fastreg page list, keep an sg table
and convert an array of pages to a sg list. Then call ib_map_mr_sg
and construct ib_reg_wr.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Tested-by: Selvin Xavier <selvin.xavier@avagotech.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  6 +--
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 76 ++++++++++++++++++--------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 34 +++++---------
 3 files changed, 55 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 7ccc961f33e9..1e4438ea2380 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge {
 };
 struct svc_rdma_fastreg_mr {
 	struct ib_mr *mr;
-	void *kva;
-	struct ib_fast_reg_page_list *page_list;
-	int page_list_len;
+	struct scatterlist *sg;
+	int sg_nents;
 	unsigned long access_flags;
-	unsigned long map_len;
 	enum dma_data_direction direction;
 	struct list_head frmr_list;
 };
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 7be42d0da19e..cb0991345816 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -220,12 +220,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 {
 	struct ib_rdma_wr read_wr;
 	struct ib_send_wr inv_wr;
-	struct ib_fast_reg_wr fastreg_wr;
+	struct ib_reg_wr reg_wr;
 	u8 key;
-	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
 	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
 	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
-	int ret, read, pno;
+	int ret, read, pno, dma_nents, n;
 	u32 pg_off = *page_offset;
 	u32 pg_no = *page_no;
 
@@ -234,16 +234,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 
 	ctxt->direction = DMA_FROM_DEVICE;
 	ctxt->frmr = frmr;
-	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
-	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+	nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
+	read = min_t(int, nents << PAGE_SHIFT, rs_length);
 
-	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
 	frmr->direction = DMA_FROM_DEVICE;
 	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->map_len = pages_needed << PAGE_SHIFT;
-	frmr->page_list_len = pages_needed;
+	frmr->sg_nents = nents;
 
-	for (pno = 0; pno < pages_needed; pno++) {
+	for (pno = 0; pno < nents; pno++) {
 		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
 
 		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -251,17 +249,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 		head->arg.len += len;
 		if (!pg_off)
 			head->count++;
+
+		sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
+			    len, pg_off);
+
 		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
 		rqstp->rq_next_page = rqstp->rq_respages + 1;
-		frmr->page_list->page_list[pno] =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					head->arg.pages[pg_no], 0,
-					PAGE_SIZE, DMA_FROM_DEVICE);
-		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
-					   frmr->page_list->page_list[pno]);
-		if (ret)
-			goto err;
-		atomic_inc(&xprt->sc_dma_used);
 
 		/* adjust offset and wrap to next page if needed */
 		pg_off += len;
@@ -277,28 +270,42 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	else
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
+	dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
+				  frmr->sg, frmr->sg_nents,
+				  frmr->direction);
+	if (!dma_nents) {
+		pr_err("svcrdma: failed to dma map sg %p\n",
+		       frmr->sg);
+		return -ENOMEM;
+	}
+	atomic_inc(&xprt->sc_dma_used);
+
+	n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+	if (unlikely(n != frmr->sg_nents)) {
+		pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
+		       frmr->mr, n, frmr->sg_nents);
+		return n < 0 ? n : -EINVAL;
+	}
+
 	/* Bump the key */
 	key = (u8)(frmr->mr->lkey & 0x000000FF);
 	ib_update_fast_reg_key(frmr->mr, ++key);
 
-	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+	ctxt->sge[0].addr = frmr->mr->iova;
 	ctxt->sge[0].lkey = frmr->mr->lkey;
-	ctxt->sge[0].length = read;
+	ctxt->sge[0].length = frmr->mr->length;
 	ctxt->count = 1;
 	ctxt->read_hdr = head;
 
-	/* Prepare FASTREG WR */
-	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-	fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR;
-	fastreg_wr.wr.send_flags = IB_SEND_SIGNALED;
-	fastreg_wr.iova_start = (unsigned long)frmr->kva;
-	fastreg_wr.page_list = frmr->page_list;
-	fastreg_wr.page_list_len = frmr->page_list_len;
-	fastreg_wr.page_shift = PAGE_SHIFT;
-	fastreg_wr.length = frmr->map_len;
-	fastreg_wr.access_flags = frmr->access_flags;
-	fastreg_wr.rkey = frmr->mr->lkey;
-	fastreg_wr.wr.next = &read_wr.wr;
+	/* Prepare REG WR */
+	reg_wr.wr.opcode = IB_WR_REG_MR;
+	reg_wr.wr.wr_id = 0;
+	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
+	reg_wr.wr.num_sge = 0;
+	reg_wr.mr = frmr->mr;
+	reg_wr.key = frmr->mr->lkey;
+	reg_wr.access = frmr->access_flags;
+	reg_wr.wr.next = &read_wr.wr;
 
 	/* Prepare RDMA_READ */
 	memset(&read_wr, 0, sizeof(read_wr));
@@ -324,7 +331,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	ctxt->wr_op = read_wr.wr.opcode;
 
 	/* Post the chain */
-	ret = svc_rdma_send(xprt, &fastreg_wr.wr);
+	ret = svc_rdma_send(xprt, &reg_wr.wr);
 	if (ret) {
 		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
 		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -338,7 +345,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	atomic_inc(&rdma_stat_read);
 	return ret;
  err:
-	svc_rdma_unmap_dma(ctxt);
+	ib_dma_unmap_sg(xprt->sc_cm_id->device,
+			frmr->sg, frmr->sg_nents, frmr->direction);
 	svc_rdma_put_context(ctxt, 0);
 	svc_rdma_put_frmr(xprt, frmr);
 	return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4a41122d586f..a266e870d870 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 {
 	struct ib_mr *mr;
-	struct ib_fast_reg_page_list *pl;
+	struct scatterlist *sg;
 	struct svc_rdma_fastreg_mr *frmr;
 	u32 num_sg;
 
@@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 	if (IS_ERR(mr))
 		goto err_free_frmr;
 
-	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
-					 num_sg);
-	if (IS_ERR(pl))
+	sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+	if (!sg)
 		goto err_free_mr;
 
+	sg_init_table(sg, RPCSVC_MAXPAGES);
+
 	frmr->mr = mr;
-	frmr->page_list = pl;
+	frmr->sg = sg;
 	INIT_LIST_HEAD(&frmr->frmr_list);
 	return frmr;
 
@@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
 		frmr = list_entry(xprt->sc_frmr_q.next,
 				  struct svc_rdma_fastreg_mr, frmr_list);
 		list_del_init(&frmr->frmr_list);
+		kfree(frmr->sg);
 		ib_dereg_mr(frmr->mr);
-		ib_free_fast_reg_page_list(frmr->page_list);
 		kfree(frmr);
 	}
 }
@@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 		frmr = list_entry(rdma->sc_frmr_q.next,
 				  struct svc_rdma_fastreg_mr, frmr_list);
 		list_del_init(&frmr->frmr_list);
-		frmr->map_len = 0;
-		frmr->page_list_len = 0;
+		frmr->sg_nents = 0;
 	}
 	spin_unlock_bh(&rdma->sc_frmr_q_lock);
 	if (frmr)
@@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 	return rdma_alloc_frmr(rdma);
 }
 
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
-			   struct svc_rdma_fastreg_mr *frmr)
-{
-	int page_no;
-	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-		dma_addr_t addr = frmr->page_list->page_list[page_no];
-		if (ib_dma_mapping_error(frmr->mr->device, addr))
-			continue;
-		atomic_dec(&xprt->sc_dma_used);
-		ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
-				  frmr->direction);
-	}
-}
-
 void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
 		       struct svc_rdma_fastreg_mr *frmr)
 {
 	if (frmr) {
-		frmr_unmap_dma(rdma, frmr);
+		ib_dma_unmap_sg(rdma->sc_cm_id->device,
+				frmr->sg, frmr->sg_nents, frmr->direction);
+		atomic_dec(&rdma->sc_dma_used);
 		spin_lock_bh(&rdma->sc_frmr_q_lock);
 		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
 		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-- 
cgit v1.2.3


From a519435a96597d8cd96123246fea4ae5a6c90b02 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 20 Oct 2015 16:34:16 +0200
Subject: dma-buf/fence: add fence_wait_any_timeout function v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Waiting for the first fence in an array of fences to signal.

This is useful for device driver specific resource managers
and also Vulkan needs something similar.

v2: more parameter checks, handling for timeout==0,
    remove NULL entry support, better callback removal.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/dma-buf/fence.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fence.h   |  3 +-
 2 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 50ef8bd8708b..7b05dbe9b296 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -397,6 +397,104 @@ out:
 }
 EXPORT_SYMBOL(fence_default_wait);
 
+static bool
+fence_test_signaled_any(struct fence **fences, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; ++i) {
+		struct fence *fence = fences[i];
+		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * fence_wait_any_timeout - sleep until any fence gets signaled
+ * or until timeout elapses
+ * @fences:	[in]	array of fences to wait on
+ * @count:	[in]	number of fences to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
+ * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
+ * on success.
+ *
+ * Synchronous waits for the first fence in the array to be signaled. The
+ * caller needs to hold a reference to all fences in the array, otherwise a
+ * fence might be freed before return, resulting in undefined behavior.
+ */
+signed long
+fence_wait_any_timeout(struct fence **fences, uint32_t count,
+		       bool intr, signed long timeout)
+{
+	struct default_wait_cb *cb;
+	signed long ret = timeout;
+	unsigned i;
+
+	if (WARN_ON(!fences || !count || timeout < 0))
+		return -EINVAL;
+
+	if (timeout == 0) {
+		for (i = 0; i < count; ++i)
+			if (fence_is_signaled(fences[i]))
+				return 1;
+
+		return 0;
+	}
+
+	cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL);
+	if (cb == NULL) {
+		ret = -ENOMEM;
+		goto err_free_cb;
+	}
+
+	for (i = 0; i < count; ++i) {
+		struct fence *fence = fences[i];
+
+		if (fence->ops->wait != fence_default_wait) {
+			ret = -EINVAL;
+			goto fence_rm_cb;
+		}
+
+		cb[i].task = current;
+		if (fence_add_callback(fence, &cb[i].base,
+				       fence_default_wait_cb)) {
+			/* This fence is already signaled */
+			goto fence_rm_cb;
+		}
+	}
+
+	while (ret > 0) {
+		if (intr)
+			set_current_state(TASK_INTERRUPTIBLE);
+		else
+			set_current_state(TASK_UNINTERRUPTIBLE);
+
+		if (fence_test_signaled_any(fences, count))
+			break;
+
+		ret = schedule_timeout(ret);
+
+		if (ret > 0 && intr && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+	__set_current_state(TASK_RUNNING);
+
+fence_rm_cb:
+	while (i-- > 0)
+		fence_remove_callback(fences[i], &cb[i].base);
+
+err_free_cb:
+	kfree(cb);
+
+	return ret;
+}
+EXPORT_SYMBOL(fence_wait_any_timeout);
+
 /**
  * fence_init - Initialize a custom fence.
  * @fence:	[in]	the fence to initialize
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 39efee130d2b..a4084d6bb851 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -305,7 +305,8 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
 }
 
 signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
-
+signed long fence_wait_any_timeout(struct fence **fences, uint32_t count,
+				   bool intr, signed long timeout);
 
 /**
  * fence_wait - sleep until the fence gets signaled
-- 
cgit v1.2.3


From 6c455ac17bcf4beae6c094a1007b976b60b4bb57 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 21 Oct 2015 12:58:17 +0200
Subject: dma-buf/fence: add fence_is_later()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return true when fence 1 is later than fence 2 without
checking if any of them are signaled.

Useful for driver specific resource handling based on fences.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/linux/fence.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fence.h b/include/linux/fence.h
index a4084d6bb851..bb522011383b 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -279,6 +279,22 @@ fence_is_signaled(struct fence *fence)
 	return false;
 }
 
+/**
+ * fence_is_later - return if f1 is chronologically later than f2
+ * @f1:	[in]	the first fence from the same context
+ * @f2:	[in]	the second fence from the same context
+ *
+ * Returns true if f1 is chronologically later than f2. Both fences must be
+ * from the same context, since a seqno is not re-used across contexts.
+ */
+static inline bool fence_is_later(struct fence *f1, struct fence *f2)
+{
+	if (WARN_ON(f1->context != f2->context))
+		return false;
+
+	return f1->seqno - f2->seqno < INT_MAX;
+}
+
 /**
  * fence_later - return the chronologically later fence
  * @f1:	[in]	the first fence from the same context
@@ -298,10 +314,10 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
 	 * set if enable_signaling wasn't called, and enabling that here is
 	 * overkill.
 	 */
-	if (f2->seqno - f1->seqno <= INT_MAX)
-		return fence_is_signaled(f2) ? NULL : f2;
-	else
+	if (fence_is_later(f1, f2))
 		return fence_is_signaled(f1) ? NULL : f1;
+	else
+		return fence_is_signaled(f2) ? NULL : f2;
 }
 
 signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
-- 
cgit v1.2.3


From a76caf55e5b356ba20a5a43ac4d9f7a04b20941d Mon Sep 17 00:00:00 2001
From: Ørjan Eide <orjan.eide@arm.com>
Date: Thu, 10 Sep 2015 18:09:30 +0100
Subject: thermal: Add devfreq cooling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a generic thermal cooling device for devfreq, that is similar to
cpu_cooling.

The device must use devfreq.  In order to use the power extension of the
cooling device, it must have registered its OPPs using the OPP library.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Ørjan Eide <orjan.eide@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig           |  14 +
 drivers/thermal/Makefile          |   3 +
 drivers/thermal/devfreq_cooling.c | 563 ++++++++++++++++++++++++++++++++++++++
 include/linux/devfreq_cooling.h   |  81 ++++++
 4 files changed, 661 insertions(+)
 create mode 100644 drivers/thermal/devfreq_cooling.c
 create mode 100644 include/linux/devfreq_cooling.h

(limited to 'include/linux')

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 5aabc4bc0d75..90629f69bb22 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -147,6 +147,20 @@ config CLOCK_THERMAL
 	  device that is configured to use this cooling mechanism will be
 	  controlled to reduce clock frequency whenever temperature is high.
 
+config DEVFREQ_THERMAL
+	bool "Generic device cooling support"
+	depends on PM_DEVFREQ
+	depends on PM_OPP
+	help
+	  This implements the generic devfreq cooling mechanism through
+	  frequency reduction for devices using devfreq.
+
+	  This will throttle the device by limiting the maximum allowed DVFS
+	  frequency corresponding to the cooling level.
+
+	  In order to use the power extensions of the cooling device,
+	  devfreq should use the simple_ondemand governor.
+
 	  If you want this support, you should say Y here.
 
 config THERMAL_EMULATION
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 26f160809959..cfae6a654793 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -22,6 +22,9 @@ thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 # clock cooling
 thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
 
+# devfreq cooling
+thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
new file mode 100644
index 000000000000..a032c5d5c374
--- /dev/null
+++ b/drivers/thermal/devfreq_cooling.c
@@ -0,0 +1,563 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014-2015 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * TODO:
+ *    - If OPPs are added or removed after devfreq cooling has
+ *      registered, the devfreq cooling won't react to it.
+ */
+
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/pm_opp.h>
+#include <linux/thermal.h>
+
+static DEFINE_MUTEX(devfreq_lock);
+static DEFINE_IDR(devfreq_idr);
+
+/**
+ * struct devfreq_cooling_device - Devfreq cooling device
+ * @id:		unique integer value corresponding to each
+ *		devfreq_cooling_device registered.
+ * @cdev:	Pointer to associated thermal cooling device.
+ * @devfreq:	Pointer to associated devfreq device.
+ * @cooling_state:	Current cooling state.
+ * @power_table:	Pointer to table with maximum power draw for each
+ *			cooling state. State is the index into the table, and
+ *			the power is in mW.
+ * @freq_table:	Pointer to a table with the frequencies sorted in descending
+ *		order.  You can index the table by cooling device state
+ * @freq_table_size:	Size of the @freq_table and @power_table
+ * @power_ops:	Pointer to devfreq_cooling_power, used to generate the
+ *		@power_table.
+ */
+struct devfreq_cooling_device {
+	int id;
+	struct thermal_cooling_device *cdev;
+	struct devfreq *devfreq;
+	unsigned long cooling_state;
+	u32 *power_table;
+	u32 *freq_table;
+	size_t freq_table_size;
+	struct devfreq_cooling_power *power_ops;
+};
+
+/**
+ * get_idr - function to get a unique id.
+ * @idr: struct idr * handle used to create a id.
+ * @id: int * value generated by this function.
+ *
+ * This function will populate @id with an unique
+ * id, using the idr API.
+ *
+ * Return: 0 on success, an error code on failure.
+ */
+static int get_idr(struct idr *idr, int *id)
+{
+	int ret;
+
+	mutex_lock(&devfreq_lock);
+	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
+	mutex_unlock(&devfreq_lock);
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
+
+	return 0;
+}
+
+/**
+ * release_idr - function to free the unique id.
+ * @idr: struct idr * handle used for creating the id.
+ * @id: int value representing the unique id.
+ */
+static void release_idr(struct idr *idr, int id)
+{
+	mutex_lock(&devfreq_lock);
+	idr_remove(idr, id);
+	mutex_unlock(&devfreq_lock);
+}
+
+/**
+ * partition_enable_opps() - disable all opps above a given state
+ * @dfc:	Pointer to devfreq we are operating on
+ * @cdev_state:	cooling device state we're setting
+ *
+ * Go through the OPPs of the device, enabling all OPPs until
+ * @cdev_state and disabling those frequencies above it.
+ */
+static int partition_enable_opps(struct devfreq_cooling_device *dfc,
+				 unsigned long cdev_state)
+{
+	int i;
+	struct device *dev = dfc->devfreq->dev.parent;
+
+	for (i = 0; i < dfc->freq_table_size; i++) {
+		struct dev_pm_opp *opp;
+		int ret = 0;
+		unsigned int freq = dfc->freq_table[i];
+		bool want_enable = i >= cdev_state ? true : false;
+
+		rcu_read_lock();
+		opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
+		rcu_read_unlock();
+
+		if (PTR_ERR(opp) == -ERANGE)
+			continue;
+		else if (IS_ERR(opp))
+			return PTR_ERR(opp);
+
+		if (want_enable)
+			ret = dev_pm_opp_enable(dev, freq);
+		else
+			ret = dev_pm_opp_disable(dev, freq);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
+					 unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+
+	*state = dfc->freq_table_size - 1;
+
+	return 0;
+}
+
+static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+					 unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+
+	*state = dfc->cooling_state;
+
+	return 0;
+}
+
+static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+					 unsigned long state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	int ret;
+
+	if (state == dfc->cooling_state)
+		return 0;
+
+	dev_dbg(dev, "Setting cooling state %lu\n", state);
+
+	if (state >= dfc->freq_table_size)
+		return -EINVAL;
+
+	ret = partition_enable_opps(dfc, state);
+	if (ret)
+		return ret;
+
+	dfc->cooling_state = state;
+
+	return 0;
+}
+
+/**
+ * freq_get_state() - get the cooling state corresponding to a frequency
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	frequency in Hz
+ *
+ * Return: the cooling state associated with the @freq, or
+ * THERMAL_CSTATE_INVALID if it wasn't found.
+ */
+static unsigned long
+freq_get_state(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	int i;
+
+	for (i = 0; i < dfc->freq_table_size; i++) {
+		if (dfc->freq_table[i] == freq)
+			return i;
+	}
+
+	return THERMAL_CSTATE_INVALID;
+}
+
+/**
+ * get_static_power() - calculate the static power
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	Frequency in Hz
+ *
+ * Calculate the static power in milliwatts using the supplied
+ * get_static_power().  The current voltage is calculated using the
+ * OPP library.  If no get_static_power() was supplied, assume the
+ * static power is negligible.
+ */
+static unsigned long
+get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	unsigned long voltage;
+	struct dev_pm_opp *opp;
+
+	if (!dfc->power_ops->get_static_power)
+		return 0;
+
+	rcu_read_lock();
+
+	opp = dev_pm_opp_find_freq_exact(dev, freq, true);
+	if (IS_ERR(opp) && (PTR_ERR(opp) == -ERANGE))
+		opp = dev_pm_opp_find_freq_exact(dev, freq, false);
+
+	voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
+
+	rcu_read_unlock();
+
+	if (voltage == 0) {
+		dev_warn_ratelimited(dev,
+				     "Failed to get voltage for frequency %lu: %ld\n",
+				     freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		return 0;
+	}
+
+	return dfc->power_ops->get_static_power(voltage);
+}
+
+/**
+ * get_dynamic_power - calculate the dynamic power
+ * @dfc:	Pointer to devfreq cooling device
+ * @freq:	Frequency in Hz
+ * @voltage:	Voltage in millivolts
+ *
+ * Calculate the dynamic power in milliwatts consumed by the device at
+ * frequency @freq and voltage @voltage.  If the get_dynamic_power()
+ * was supplied as part of the devfreq_cooling_power struct, then that
+ * function is used.  Otherwise, a simple power model (Pdyn = Coeff *
+ * Voltage^2 * Frequency) is used.
+ */
+static unsigned long
+get_dynamic_power(struct devfreq_cooling_device *dfc, unsigned long freq,
+		  unsigned long voltage)
+{
+	unsigned long power;
+	u32 freq_mhz;
+	struct devfreq_cooling_power *dfc_power = dfc->power_ops;
+
+	if (dfc_power->get_dynamic_power)
+		return dfc_power->get_dynamic_power(freq, voltage);
+
+	freq_mhz = freq / 1000000;
+	power = (u64)dfc_power->dyn_power_coeff * freq_mhz * voltage * voltage;
+	do_div(power, 1000000000);
+
+	return power;
+}
+
+static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
+					       struct thermal_zone_device *tz,
+					       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct devfreq_dev_status *status = &df->last_status;
+	unsigned long state;
+	unsigned long freq = status->current_frequency;
+	u32 dyn_power, static_power;
+
+	/* Get dynamic power for state */
+	state = freq_get_state(dfc, freq);
+	if (state == THERMAL_CSTATE_INVALID)
+		return -EAGAIN;
+
+	dyn_power = dfc->power_table[state];
+
+	/* Scale dynamic power for utilization */
+	dyn_power = (dyn_power * status->busy_time) / status->total_time;
+
+	/* Get static power */
+	static_power = get_static_power(dfc, freq);
+
+	*power = dyn_power + static_power;
+
+	return 0;
+}
+
+static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       unsigned long state,
+				       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	unsigned long freq;
+	u32 static_power;
+
+	if (state < 0 || state >= dfc->freq_table_size)
+		return -EINVAL;
+
+	freq = dfc->freq_table[state];
+	static_power = get_static_power(dfc, freq);
+
+	*power = dfc->power_table[state] + static_power;
+	return 0;
+}
+
+static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       u32 power, unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct devfreq_dev_status *status = &df->last_status;
+	unsigned long freq = status->current_frequency;
+	unsigned long busy_time;
+	s32 dyn_power;
+	u32 static_power;
+	int i;
+
+	static_power = get_static_power(dfc, freq);
+
+	dyn_power = power - static_power;
+	dyn_power = dyn_power > 0 ? dyn_power : 0;
+
+	/* Scale dynamic power for utilization */
+	busy_time = status->busy_time ?: 1;
+	dyn_power = (dyn_power * status->total_time) / busy_time;
+
+	/*
+	 * Find the first cooling state that is within the power
+	 * budget for dynamic power.
+	 */
+	for (i = 0; i < dfc->freq_table_size - 1; i++)
+		if (dyn_power >= dfc->power_table[i])
+			break;
+
+	*state = i;
+	return 0;
+}
+
+static struct thermal_cooling_device_ops devfreq_cooling_ops = {
+	.get_max_state = devfreq_cooling_get_max_state,
+	.get_cur_state = devfreq_cooling_get_cur_state,
+	.set_cur_state = devfreq_cooling_set_cur_state,
+};
+
+/**
+ * devfreq_cooling_gen_tables() - Generate power and freq tables.
+ * @dfc: Pointer to devfreq cooling device.
+ *
+ * Generate power and frequency tables: the power table hold the
+ * device's maximum power usage at each cooling state (OPP).  The
+ * static and dynamic power using the appropriate voltage and
+ * frequency for the state, is acquired from the struct
+ * devfreq_cooling_power, and summed to make the maximum power draw.
+ *
+ * The frequency table holds the frequencies in descending order.
+ * That way its indexed by cooling device state.
+ *
+ * The tables are malloced, and pointers put in dfc.  They must be
+ * freed when unregistering the devfreq cooling device.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int devfreq_cooling_gen_tables(struct devfreq_cooling_device *dfc)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	int ret, num_opps;
+	unsigned long freq;
+	u32 *power_table = NULL;
+	u32 *freq_table;
+	int i;
+
+	num_opps = dev_pm_opp_get_opp_count(dev);
+
+	if (dfc->power_ops) {
+		power_table = kcalloc(num_opps, sizeof(*power_table),
+				      GFP_KERNEL);
+		if (!power_table)
+			ret = -ENOMEM;
+	}
+
+	freq_table = kcalloc(num_opps, sizeof(*freq_table),
+			     GFP_KERNEL);
+	if (!freq_table) {
+		ret = -ENOMEM;
+		goto free_power_table;
+	}
+
+	for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
+		unsigned long power_dyn, voltage;
+		struct dev_pm_opp *opp;
+
+		rcu_read_lock();
+
+		opp = dev_pm_opp_find_freq_floor(dev, &freq);
+		if (IS_ERR(opp)) {
+			rcu_read_unlock();
+			ret = PTR_ERR(opp);
+			goto free_tables;
+		}
+
+		voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
+
+		rcu_read_unlock();
+
+		if (dfc->power_ops) {
+			power_dyn = get_dynamic_power(dfc, freq, voltage);
+
+			dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
+				freq / 1000000, voltage, power_dyn, power_dyn);
+
+			power_table[i] = power_dyn;
+		}
+
+		freq_table[i] = freq;
+	}
+
+	if (dfc->power_ops)
+		dfc->power_table = power_table;
+
+	dfc->freq_table = freq_table;
+	dfc->freq_table_size = num_opps;
+
+	return 0;
+
+free_tables:
+	kfree(freq_table);
+free_power_table:
+	kfree(power_table);
+
+	return ret;
+}
+
+/**
+ * of_devfreq_cooling_register_power() - Register devfreq cooling device,
+ *                                      with OF and power information.
+ * @np:	Pointer to OF device_node.
+ * @df:	Pointer to devfreq device.
+ * @dfc_power:	Pointer to devfreq_cooling_power.
+ *
+ * Register a devfreq cooling device.  The available OPPs must be
+ * registered on the device.
+ *
+ * If @dfc_power is provided, the cooling device is registered with the
+ * power extensions.  For the power extensions to work correctly,
+ * devfreq should use the simple_ondemand governor, other governors
+ * are not currently supported.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power)
+{
+	struct thermal_cooling_device *cdev;
+	struct devfreq_cooling_device *dfc;
+	char dev_name[THERMAL_NAME_LENGTH];
+	int err;
+
+	dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
+	if (!dfc)
+		return ERR_PTR(-ENOMEM);
+
+	dfc->devfreq = df;
+
+	if (dfc_power) {
+		dfc->power_ops = dfc_power;
+
+		devfreq_cooling_ops.get_requested_power =
+			devfreq_cooling_get_requested_power;
+		devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
+		devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
+	}
+
+	err = devfreq_cooling_gen_tables(dfc);
+	if (err)
+		goto free_dfc;
+
+	err = get_idr(&devfreq_idr, &dfc->id);
+	if (err)
+		goto free_tables;
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
+
+	cdev = thermal_of_cooling_device_register(np, dev_name, dfc,
+						  &devfreq_cooling_ops);
+	if (IS_ERR(cdev)) {
+		err = PTR_ERR(cdev);
+		dev_err(df->dev.parent,
+			"Failed to register devfreq cooling device (%d)\n",
+			err);
+		goto release_idr;
+	}
+
+	dfc->cdev = cdev;
+
+	return dfc;
+
+release_idr:
+	release_idr(&devfreq_idr, dfc->id);
+free_tables:
+	kfree(dfc->power_table);
+	kfree(dfc->freq_table);
+free_dfc:
+	kfree(dfc);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
+
+/**
+ * of_devfreq_cooling_register() - Register devfreq cooling device,
+ *                                with OF information.
+ * @np: Pointer to OF device_node.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return of_devfreq_cooling_register_power(np, df, NULL);
+}
+EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_register() - Register devfreq cooling device.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df)
+{
+	return of_devfreq_cooling_register(NULL, df);
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_unregister() - Unregister devfreq cooling device.
+ * @dfc: Pointer to devfreq cooling device to unregister.
+ */
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+	if (!dfc)
+		return;
+
+	thermal_cooling_device_unregister(dfc->cdev);
+	release_idr(&devfreq_idr, dfc->id);
+	kfree(dfc->power_table);
+	kfree(dfc->freq_table);
+
+	kfree(dfc);
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
new file mode 100644
index 000000000000..ee5f0ec9290b
--- /dev/null
+++ b/include/linux/devfreq_cooling.h
@@ -0,0 +1,81 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014-2015 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DEVFREQ_COOLING_H__
+#define __DEVFREQ_COOLING_H__
+
+#include <linux/devfreq.h>
+#include <linux/thermal.h>
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+
+/**
+ * struct devfreq_cooling_power - Devfreq cooling power ops
+ * @get_static_power:	Take voltage, in mV, and return the static power
+ *			in mW.  If NULL, the static power is assumed
+ *			to be 0.
+ * @get_dynamic_power:	Take voltage, in mV, and frequency, in HZ, and
+ *			return the dynamic power draw in mW.  If NULL,
+ *			a simple power model is used.
+ * @dyn_power_coeff:	Coefficient for the simple dynamic power model in
+ *			mW/(MHz mV mV).
+ *			If get_dynamic_power() is NULL, then the
+ *			dynamic power is calculated as
+ *			@dyn_power_coeff * frequency * voltage^2
+ */
+struct devfreq_cooling_power {
+	unsigned long (*get_static_power)(unsigned long voltage);
+	unsigned long (*get_dynamic_power)(unsigned long freq,
+					   unsigned long voltage);
+	unsigned long dyn_power_coeff;
+};
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power);
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df);
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc);
+
+#else /* !CONFIG_DEVFREQ_THERMAL */
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct devfreq_cooling_device *
+devfreq_cooling_register(struct devfreq *df)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline void
+devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+}
+
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* __DEVFREQ_COOLING_H__ */
-- 
cgit v1.2.3


From df5c7386f62d2db95ca48005087195e9a15e2b1f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 13 Oct 2015 20:09:19 +0300
Subject: dmaengine: dw: some Intel devices has no memcpy support

Provide a flag to choose if the device does support memory-to-memory transfers.
At least this is not true for iDMA32 controller that might be supported in the
future. Besides that Intel BayTrail and Braswell users should not try this
feature due to HW specific behaviour.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/core.c                | 6 +++++-
 include/linux/platform_data/dma-dw.h | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index f16d1ed99ba9..41e9554b884d 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1541,6 +1541,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 
 		/* Fill platform data with the default values */
 		pdata->is_private = true;
+		pdata->is_memcpy = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
 	} else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
@@ -1653,10 +1654,13 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
 
-	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+	/* Set capabilities */
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
 	if (pdata->is_private)
 		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+	if (pdata->is_memcpy)
+		dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
 	dw->dma.dev = chip->dev;
 	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
 	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 87ac14c584f2..03b6095d3b18 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -37,6 +37,7 @@ struct dw_dma_slave {
  * @nr_channels: Number of channels supported by hardware (max 8)
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
+ * @is_memcpy: The device channels do support memory-to-memory transfers.
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
@@ -47,6 +48,7 @@ struct dw_dma_slave {
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
 	bool		is_private;
+	bool		is_memcpy;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
-- 
cgit v1.2.3


From 42e5c3e2725ba0c0affc1fc8a6aa1d5cf31ecb75 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 24 Oct 2015 17:27:35 -0400
Subject: SUNRPC: Abstract backchannel operations

xprt_{setup,destroy}_backchannel() won't be adequate for RPC/RMDA
bi-direction. In particular, receive buffers have to be pre-
registered and posted in order to receive incoming backchannel
requests.

Add a virtual function call to allow the insertion of appropriate
backchannel setup and destruction methods for each transport.

In addition, freeing a backchannel request is a little different
for RPC/RDMA. Introduce an rpc_xprt_op to handle the difference.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Tested-By: Devesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/bc_xprt.h |  5 +++++
 include/linux/sunrpc/xprt.h    |  5 +++++
 net/sunrpc/backchannel_rqst.c  | 24 ++++++++++++++++++++++--
 net/sunrpc/xprtsock.c          |  5 +++++
 4 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 8df43c9f11dc..4397a4824c81 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -38,6 +38,11 @@ void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
 
+/* Socket backchannel transport methods */
+int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs);
+void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs);
+void xprt_free_bc_rqst(struct rpc_rqst *req);
+
 /*
  * Determine if a shared backchannel is in use
  */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 0fb9acbb4780..3f79c4a4ce74 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -136,6 +136,11 @@ struct rpc_xprt_ops {
 	int		(*enable_swap)(struct rpc_xprt *xprt);
 	void		(*disable_swap)(struct rpc_xprt *xprt);
 	void		(*inject_disconnect)(struct rpc_xprt *xprt);
+	int		(*bc_setup)(struct rpc_xprt *xprt,
+				    unsigned int min_reqs);
+	void		(*bc_free_rqst)(struct rpc_rqst *rqst);
+	void		(*bc_destroy)(struct rpc_xprt *xprt,
+				      unsigned int max_reqs);
 };
 
 /*
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 6255d141133b..229956bf8457 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -137,6 +137,14 @@ out_free:
  * callback requests can be up to 4096 bytes in size.
  */
 int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
+{
+	if (!xprt->ops->bc_setup)
+		return 0;
+	return xprt->ops->bc_setup(xprt, min_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
+
+int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
 {
 	struct rpc_rqst *req;
 	struct list_head tmp_list;
@@ -192,7 +200,6 @@ out_free:
 	dprintk("RPC:       setup backchannel transport failed\n");
 	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
 
 /**
  * xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
@@ -204,6 +211,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
  * of reqs specified by the caller.
  */
 void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
+{
+	if (xprt->ops->bc_destroy)
+		xprt->ops->bc_destroy(xprt, max_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
+
+void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
 {
 	struct rpc_rqst *req = NULL, *tmp = NULL;
 
@@ -227,7 +241,6 @@ out:
 	dprintk("RPC:        backchannel list empty= %s\n",
 		list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
-EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
 
 static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
 {
@@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 
+	xprt->ops->bc_free_rqst(req);
+}
+
+void xprt_free_bc_rqst(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
 	dprintk("RPC:       free backchannel req=%p\n", req);
 
 	req->rq_connect_cookie = xprt->connect_cookie - 1;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1a85e0ed0b48..44a81e4c6783 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2580,6 +2580,11 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.enable_swap		= xs_enable_swap,
 	.disable_swap		= xs_disable_swap,
 	.inject_disconnect	= xs_inject_disconnect,
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+	.bc_setup		= xprt_setup_bc,
+	.bc_free_rqst		= xprt_free_bc_rqst,
+	.bc_destroy		= xprt_destroy_bc,
+#endif
 };
 
 /*
-- 
cgit v1.2.3


From 9468431962616c2449d47c482208a5967e011bf9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 24 Oct 2015 17:28:16 -0400
Subject: svcrdma: Add backward direction service for RPC/RDMA transport

On NFSv4.1 mount points, the Linux NFS client uses this transport
endpoint to receive backward direction calls and route replies back
to the NFSv4.1 server.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Tested-By: Devesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/svc_rdma.h          |  6 +++-
 include/linux/sunrpc/xprt.h              |  1 +
 net/sunrpc/xprtrdma/svc_rdma.c           |  6 ++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 58 ++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 7ccc961f33e9..fb4013edcf57 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -228,9 +228,13 @@ extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
 			      struct svc_rdma_fastreg_mr *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
-extern struct svc_xprt_class svc_rdma_class;
 extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
 
+extern struct svc_xprt_class svc_rdma_class;
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+extern struct svc_xprt_class svc_rdma_bc_class;
+#endif
+
 /* svc_rdma.c */
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3f79c4a4ce74..82c083946ef0 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -158,6 +158,7 @@ enum xprt_transports {
 	XPRT_TRANSPORT_TCP	= IPPROTO_TCP,
 	XPRT_TRANSPORT_BC_TCP	= IPPROTO_TCP | XPRT_TRANSPORT_BC,
 	XPRT_TRANSPORT_RDMA	= 256,
+	XPRT_TRANSPORT_BC_RDMA	= XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC,
 	XPRT_TRANSPORT_LOCAL	= 257,
 };
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 2cd252f023a5..1b7051bdbdc8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -239,6 +239,9 @@ void svc_rdma_cleanup(void)
 		unregister_sysctl_table(svcrdma_table_header);
 		svcrdma_table_header = NULL;
 	}
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	svc_unreg_xprt_class(&svc_rdma_bc_class);
+#endif
 	svc_unreg_xprt_class(&svc_rdma_class);
 	kmem_cache_destroy(svc_rdma_map_cachep);
 	kmem_cache_destroy(svc_rdma_ctxt_cachep);
@@ -286,6 +289,9 @@ int svc_rdma_init(void)
 
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	svc_reg_xprt_class(&svc_rdma_bc_class);
+#endif
 	return 0;
  err1:
 	kmem_cache_destroy(svc_rdma_map_cachep);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..a133b1e5b5f6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -56,6 +56,7 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 					struct net *net,
 					struct sockaddr *sa, int salen,
@@ -95,6 +96,63 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
+					   struct sockaddr *, int, int);
+static void svc_rdma_bc_detach(struct svc_xprt *);
+static void svc_rdma_bc_free(struct svc_xprt *);
+
+static struct svc_xprt_ops svc_rdma_bc_ops = {
+	.xpo_create = svc_rdma_bc_create,
+	.xpo_detach = svc_rdma_bc_detach,
+	.xpo_free = svc_rdma_bc_free,
+	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
+	.xpo_secure_port = svc_rdma_secure_port,
+};
+
+struct svc_xprt_class svc_rdma_bc_class = {
+	.xcl_name = "rdma-bc",
+	.xcl_owner = THIS_MODULE,
+	.xcl_ops = &svc_rdma_bc_ops,
+	.xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
+};
+
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
+					   struct net *net,
+					   struct sockaddr *sa, int salen,
+					   int flags)
+{
+	struct svcxprt_rdma *cma_xprt;
+	struct svc_xprt *xprt;
+
+	cma_xprt = rdma_create_xprt(serv, 0);
+	if (!cma_xprt)
+		return ERR_PTR(-ENOMEM);
+	xprt = &cma_xprt->sc_xprt;
+
+	svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+	serv->sv_bc_xprt = xprt;
+
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+	return xprt;
+}
+
+static void svc_rdma_bc_detach(struct svc_xprt *xprt)
+{
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+}
+
+static void svc_rdma_bc_free(struct svc_xprt *xprt)
+{
+	struct svcxprt_rdma *rdma =
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+	if (xprt)
+		kfree(rdma);
+}
+#endif	/* CONFIG_SUNRPC_BACKCHANNEL */
+
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt;
-- 
cgit v1.2.3


From 3c99c2cef75eb5bfc05c5728e4560f3ee656d47e Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Mon, 2 Nov 2015 19:03:03 +0000
Subject: thermal: devfreq_cooling: use a thermal_cooling_device for register
 and unregister

Be consistent with what other cooling devices do and return a struct
thermal_cooling_device * on register.  Also, for the unregister, accept
a struct thermal_cooling_device * as parameter.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/devfreq_cooling.c | 16 ++++++++++------
 include/linux/devfreq_cooling.h   | 16 ++++++++--------
 2 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index a27206815066..31e40a9a9fd0 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -467,7 +467,7 @@ free_power_table:
  * devfreq should use the simple_ondemand governor, other governors
  * are not currently supported.
  */
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power)
 {
@@ -513,7 +513,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 
 	dfc->cdev = cdev;
 
-	return dfc;
+	return cdev;
 
 release_idr:
 	release_idr(&devfreq_idr, dfc->id);
@@ -533,7 +533,7 @@ EXPORT_SYMBOL_GPL(of_devfreq_cooling_register_power);
  * @np: Pointer to OF device_node.
  * @df: Pointer to devfreq device.
  */
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
 {
 	return of_devfreq_cooling_register_power(np, df, NULL);
@@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(of_devfreq_cooling_register);
  * devfreq_cooling_register() - Register devfreq cooling device.
  * @df: Pointer to devfreq device.
  */
-struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df)
+struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
 {
 	return of_devfreq_cooling_register(NULL, df);
 }
@@ -554,11 +554,15 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_register);
  * devfreq_cooling_unregister() - Unregister devfreq cooling device.
  * @dfc: Pointer to devfreq cooling device to unregister.
  */
-void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-	if (!dfc)
+	struct devfreq_cooling_device *dfc;
+
+	if (!cdev)
 		return;
 
+	dfc = cdev->devdata;
+
 	thermal_cooling_device_unregister(dfc->cdev);
 	release_idr(&devfreq_idr, dfc->id);
 	kfree(dfc->power_table);
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index ee5f0ec9290b..7adf6cc4b305 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -43,37 +43,37 @@ struct devfreq_cooling_power {
 	unsigned long dyn_power_coeff;
 };
 
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power);
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
-struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df);
-void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc);
+struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);
+void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);
 
 #else /* !CONFIG_DEVFREQ_THERMAL */
 
-struct devfreq_cooling_device *
+struct thermal_cooling_device *
 of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 				  struct devfreq_cooling_power *dfc_power)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct devfreq_cooling_device *
+static inline struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct devfreq_cooling_device *
+static inline struct thermal_cooling_device *
 devfreq_cooling_register(struct devfreq *df)
 {
 	return ERR_PTR(-EINVAL);
 }
 
 static inline void
-devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+devfreq_cooling_unregister(struct thermal_cooling_device *dfc)
 {
 }
 
-- 
cgit v1.2.3


From 76566773a1f1c2295ed901b6f1241cfe10d99029 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 24 Oct 2015 17:28:32 -0400
Subject: NFS: Enable client side NFSv4.1 backchannel to use other transports

Forechannel transports get their own "bc_up" method to create an
endpoint for the backchannel service.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Anna Schumaker: Add forward declaration of struct net to xprt.h]
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback.c                 | 40 ++++++++-------------------------------
 include/linux/sunrpc/xprt.h       |  3 +++
 net/sunrpc/xprtrdma/backchannel.c | 21 ++++++++++++++++++++
 net/sunrpc/xprtrdma/transport.c   |  1 +
 net/sunrpc/xprtrdma/xprt_rdma.h   |  1 +
 net/sunrpc/xprtsock.c             | 12 ++++++++++++
 6 files changed, 46 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 75f7c0a7538a..a7f2e6e33305 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv)
 }
 
 #if defined(CONFIG_NFS_V4_1)
-static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
-{
-	/*
-	 * Create an svc_sock for the back channel service that shares the
-	 * fore channel connection.
-	 * Returns the input port (0) and sets the svc_serv bc_xprt on success
-	 */
-	return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
-			      SVC_SOCK_ANONYMOUS);
-}
-
 /*
  * The callback service for NFSv4.1 callbacks
  */
@@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
 		xprt->bc_serv = serv;
 }
 #else
-static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
-{
-	return 0;
-}
-
 static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv,
 		struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
 {
@@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
 	svc_shutdown_net(serv, net);
 }
 
-static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net)
+static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+			       struct net *net, struct rpc_xprt *xprt)
 {
 	struct nfs_net *nn = net_generic(net, nfs_net_id);
 	int ret;
@@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n
 		goto err_bind;
 	}
 
-	switch (minorversion) {
-		case 0:
-			ret = nfs4_callback_up_net(serv, net);
-			break;
-		case 1:
-		case 2:
-			ret = nfs41_callback_up_net(serv, net);
-			break;
-		default:
-			printk(KERN_ERR "NFS: unknown callback version: %d\n",
-					minorversion);
-			ret = -EINVAL;
-			break;
-	}
+	ret = -EPROTONOSUPPORT;
+	if (minorversion == 0)
+		ret = nfs4_callback_up_net(serv, net);
+	else if (xprt->ops->bc_up)
+		ret = xprt->ops->bc_up(serv, net);
 
 	if (ret < 0) {
 		printk(KERN_ERR "NFS: callback service start failed\n");
@@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
 		goto err_create;
 	}
 
-	ret = nfs_callback_up_net(minorversion, serv, net);
+	ret = nfs_callback_up_net(minorversion, serv, net, xprt);
 	if (ret < 0)
 		goto err_net;
 
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 82c083946ef0..69ef5b3ab038 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -54,6 +54,8 @@ enum rpc_display_format_t {
 struct rpc_task;
 struct rpc_xprt;
 struct seq_file;
+struct svc_serv;
+struct net;
 
 /*
  * This describes a complete RPC request
@@ -138,6 +140,7 @@ struct rpc_xprt_ops {
 	void		(*inject_disconnect)(struct rpc_xprt *xprt);
 	int		(*bc_setup)(struct rpc_xprt *xprt,
 				    unsigned int min_reqs);
+	int		(*bc_up)(struct svc_serv *serv, struct net *net);
 	void		(*bc_free_rqst)(struct rpc_rqst *rqst);
 	void		(*bc_destroy)(struct rpc_xprt *xprt,
 				      unsigned int max_reqs);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 0b3387fe3f0d..2dcb44f69e53 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
 
 #include "xprt_rdma.h"
 
@@ -173,6 +174,26 @@ out_err:
 	return -ENOMEM;
 }
 
+/**
+ * xprt_rdma_bc_up - Create transport endpoint for backchannel service
+ * @serv: server endpoint
+ * @net: network namespace
+ *
+ * The "xprt" is an implied argument: it supplies the name of the
+ * backchannel transport class.
+ *
+ * Returns zero on success, negative errno on failure
+ */
+int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
+{
+	int ret;
+
+	ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
 /**
  * rpcrdma_bc_marshal_reply - Send backwards direction reply
  * @rqst: buffer containing RPC reply data
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 845278e63be0..8c545f7d7525 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -708,6 +708,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
 	.inject_disconnect	= xprt_rdma_inject_disconnect,
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	.bc_setup		= xprt_rdma_bc_setup,
+	.bc_up			= xprt_rdma_bc_up,
 	.bc_free_rqst		= xprt_rdma_bc_free_rqst,
 	.bc_destroy		= xprt_rdma_bc_destroy,
 #endif
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index eb87d96e80ca..f8dd17be9f43 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -520,6 +520,7 @@ void xprt_rdma_cleanup(void);
  */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
+int xprt_rdma_bc_up(struct svc_serv *, struct net *);
 int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
 void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
 int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 44a81e4c6783..dc4706711224 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1306,6 +1306,17 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 		xs_tcp_read_reply(xprt, desc) :
 		xs_tcp_read_callback(xprt, desc);
 }
+
+static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
+{
+	int ret;
+
+	ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
+			      SVC_SOCK_ANONYMOUS);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
 #else
 static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 					struct xdr_skb_reader *desc)
@@ -2582,6 +2593,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.inject_disconnect	= xs_inject_disconnect,
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 	.bc_setup		= xprt_setup_bc,
+	.bc_up			= xs_tcp_bc_up,
 	.bc_free_rqst		= xprt_free_bc_rqst,
 	.bc_destroy		= xprt_destroy_bc,
 #endif
-- 
cgit v1.2.3


From 79dbd1baa651cece408e68a1b445f3628c4b5bdc Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 26 Oct 2015 22:23:56 +0100
Subject: libceph: msg signing callouts don't need con argument

We can use msg->con instead - at the point we sign an outgoing message
or check the signature on the incoming one, msg->con is always set.  We
wouldn't know how to sign a message without an associated session (i.e.
msg->con == NULL) and being able to sign a message using an explicitly
provided authorizer is of no use.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c           | 14 ++++++++------
 include/linux/ceph/messenger.h |  5 ++---
 net/ceph/messenger.c           |  4 ++--
 net/ceph/osd_client.c          | 14 ++++++++------
 4 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 89838a226fe9..e7b130a637f9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3942,17 +3942,19 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
 	return msg;
 }
 
-static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+static int mds_sign_message(struct ceph_msg *msg)
 {
-       struct ceph_mds_session *s = con->private;
+       struct ceph_mds_session *s = msg->con->private;
        struct ceph_auth_handshake *auth = &s->s_auth;
+
        return ceph_auth_sign_message(auth, msg);
 }
 
-static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+static int mds_check_message_signature(struct ceph_msg *msg)
 {
-       struct ceph_mds_session *s = con->private;
+       struct ceph_mds_session *s = msg->con->private;
        struct ceph_auth_handshake *auth = &s->s_auth;
+
        return ceph_auth_check_message_signature(auth, msg);
 }
 
@@ -3965,8 +3967,8 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
 	.alloc_msg = mds_alloc_msg,
-	.sign_message = sign_message,
-	.check_message_signature = check_message_signature,
+	.sign_message = mds_sign_message,
+	.check_message_signature = mds_check_message_signature,
 };
 
 /* eof */
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index b2371d9b51fa..3687ff0f0133 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -43,10 +43,9 @@ struct ceph_connection_operations {
 	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
 					struct ceph_msg_header *hdr,
 					int *skip);
-	int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg);
 
-	int (*check_message_signature) (struct ceph_connection *con,
-					struct ceph_msg *msg);
+	int (*sign_message) (struct ceph_msg *msg);
+	int (*check_message_signature) (struct ceph_msg *msg);
 };
 
 /* use format string %s%d */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index fce6ad636613..805f6f82139f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1205,7 +1205,7 @@ static void prepare_write_message_footer(struct ceph_connection *con)
 	con->out_kvec[v].iov_base = &m->footer;
 	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
 		if (con->ops->sign_message)
-			con->ops->sign_message(con, m);
+			con->ops->sign_message(m);
 		else
 			m->footer.sig = 0;
 		con->out_kvec[v].iov_len = sizeof(m->footer);
@@ -2422,7 +2422,7 @@ static int read_partial_message(struct ceph_connection *con)
 	}
 
 	if (need_sign && con->ops->check_message_signature &&
-	    con->ops->check_message_signature(con, m)) {
+	    con->ops->check_message_signature(m)) {
 		pr_err("read_partial_message %p signature check failed\n", m);
 		return -EBADMSG;
 	}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 191bc21cecea..118e4ce37ecc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2979,17 +2979,19 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
-static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_sign_message(struct ceph_msg *msg)
 {
-	struct ceph_osd *o = con->private;
+	struct ceph_osd *o = msg->con->private;
 	struct ceph_auth_handshake *auth = &o->o_auth;
+
 	return ceph_auth_sign_message(auth, msg);
 }
 
-static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+static int osd_check_message_signature(struct ceph_msg *msg)
 {
-	struct ceph_osd *o = con->private;
+	struct ceph_osd *o = msg->con->private;
 	struct ceph_auth_handshake *auth = &o->o_auth;
+
 	return ceph_auth_check_message_signature(auth, msg);
 }
 
@@ -3001,7 +3003,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
-	.sign_message = sign_message,
-	.check_message_signature = check_message_signature,
+	.sign_message = osd_sign_message,
+	.check_message_signature = osd_check_message_signature,
 	.fault = osd_reset,
 };
-- 
cgit v1.2.3


From 859bff51dc5e92ddfb5eb6f17b8040d9311095bb Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 28 Oct 2015 23:50:58 +0100
Subject: libceph: stop duplicating client fields in messenger

supported_features and required_features serve no purpose at all, while
nocrc and tcp_nodelay belong to ceph_options::flags.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h   |  1 +
 include/linux/ceph/messenger.h | 11 +----------
 net/ceph/ceph_common.c         |  6 +-----
 net/ceph/messenger.c           | 26 +++++++++-----------------
 4 files changed, 12 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 397c5cd09794..a7caafe03d3c 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -137,6 +137,7 @@ struct ceph_client {
 #endif
 };
 
+#define from_msgr(ms)	container_of(ms, struct ceph_client, msgr)
 
 
 /*
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 3687ff0f0133..71b1d6cdcb5d 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -57,8 +57,6 @@ struct ceph_messenger {
 
 	atomic_t stopping;
 	possible_net_t net;
-	bool nocrc;
-	bool tcp_nodelay;
 
 	/*
 	 * the global_seq counts connections i (attempt to) initiate
@@ -66,9 +64,6 @@ struct ceph_messenger {
 	 */
 	u32 global_seq;
 	spinlock_t global_seq_lock;
-
-	u64 supported_features;
-	u64 required_features;
 };
 
 enum ceph_msg_data_type {
@@ -267,11 +262,7 @@ extern void ceph_msgr_exit(void);
 extern void ceph_msgr_flush(void);
 
 extern void ceph_messenger_init(struct ceph_messenger *msgr,
-			struct ceph_entity_addr *myaddr,
-			u64 supported_features,
-			u64 required_features,
-			bool nocrc,
-			bool tcp_nodelay);
+				struct ceph_entity_addr *myaddr);
 extern void ceph_messenger_fini(struct ceph_messenger *msgr);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 54a00d66509e..d1494d1a8592 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -596,11 +596,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	if (ceph_test_opt(client, MYIP))
 		myaddr = &client->options->my_addr;
 
-	ceph_messenger_init(&client->msgr, myaddr,
-		client->supported_features,
-		client->required_features,
-		ceph_test_opt(client, NOCRC),
-		ceph_test_opt(client, TCP_NODELAY));
+	ceph_messenger_init(&client->msgr, myaddr);
 
 	/* subsystems */
 	err = ceph_monc_init(&client->monc, client);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 805f6f82139f..11108076bac3 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -509,7 +509,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 		return ret;
 	}
 
-	if (con->msgr->tcp_nodelay) {
+	if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
 		int optval = 1;
 
 		ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
@@ -1432,7 +1432,8 @@ static int prepare_write_connect(struct ceph_connection *con)
 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
 	     con->connect_seq, global_seq, proto);
 
-	con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
+	con->out_connect.features =
+	    cpu_to_le64(from_msgr(con->msgr)->supported_features);
 	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
 	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
 	con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1527,7 +1528,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 {
 	struct ceph_msg *msg = con->out_msg;
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
-	bool do_datacrc = !con->msgr->nocrc;
+	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
 	u32 crc;
 
 	dout("%s %p msg %p\n", __func__, con, msg);
@@ -2005,8 +2006,8 @@ static int process_banner(struct ceph_connection *con)
 
 static int process_connect(struct ceph_connection *con)
 {
-	u64 sup_feat = con->msgr->supported_features;
-	u64 req_feat = con->msgr->required_features;
+	u64 sup_feat = from_msgr(con->msgr)->supported_features;
+	u64 req_feat = from_msgr(con->msgr)->required_features;
 	u64 server_feat = ceph_sanitize_features(
 				le64_to_cpu(con->in_reply.features));
 	int ret;
@@ -2232,7 +2233,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 {
 	struct ceph_msg *msg = con->in_msg;
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
-	const bool do_datacrc = !con->msgr->nocrc;
+	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
 	struct page *page;
 	size_t page_offset;
 	size_t length;
@@ -2277,7 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
 	int end;
 	int ret;
 	unsigned int front_len, middle_len, data_len;
-	bool do_datacrc = !con->msgr->nocrc;
+	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
 	bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
 	u64 seq;
 	u32 crc;
@@ -2951,15 +2952,8 @@ static void con_fault(struct ceph_connection *con)
  * initialize a new messenger instance
  */
 void ceph_messenger_init(struct ceph_messenger *msgr,
-			struct ceph_entity_addr *myaddr,
-			u64 supported_features,
-			u64 required_features,
-			bool nocrc,
-			bool tcp_nodelay)
+			 struct ceph_entity_addr *myaddr)
 {
-	msgr->supported_features = supported_features;
-	msgr->required_features = required_features;
-
 	spin_lock_init(&msgr->global_seq_lock);
 
 	if (myaddr)
@@ -2969,8 +2963,6 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
 	msgr->inst.addr.type = 0;
 	get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
 	encode_my_addr(msgr);
-	msgr->nocrc = nocrc;
-	msgr->tcp_nodelay = tcp_nodelay;
 
 	atomic_set(&msgr->stopping, 0);
 	write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
-- 
cgit v1.2.3


From a51983e4dd2d4d63912aab939f657c4cd476e21a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 28 Oct 2015 23:52:06 +0100
Subject: libceph: add nocephx_sign_messages option

Support for message signing was merged into 3.19, along with
nocephx_require_signatures option.  But, all that option does is allow
the kernel client to talk to clusters that don't support MSG_AUTH
feature bit.  That's pretty useless, given that it's been supported
since bobtail.

Meanwhile, if one disables message signing on the server side with
"cephx sign messages = false", it becomes impossible to use the kernel
client since it expects messages to be signed if MSG_AUTH was
negotiated.  Add nocephx_sign_messages option to support this use case.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h |  3 ++-
 net/ceph/auth_x.c            |  7 +++++++
 net/ceph/ceph_common.c       | 12 ++++++++++++
 net/ceph/messenger.c         |  2 +-
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index a7caafe03d3c..3e3799cdc6e6 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -29,8 +29,9 @@
 #define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
 #define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
 #define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
-#define CEPH_OPT_NOMSGAUTH	  (1<<4) /* not require cephx message signature */
+#define CEPH_OPT_NOMSGAUTH	  (1<<4) /* don't require msg signing feat */
 #define CEPH_OPT_TCP_NODELAY	  (1<<5) /* TCP_NODELAY on TCP sockets */
+#define CEPH_OPT_NOMSGSIGN	  (1<<6) /* don't sign msgs */
 
 #define CEPH_OPT_DEFAULT   (CEPH_OPT_TCP_NODELAY)
 
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 3a544ca6b5ce..10d87753ed87 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
 
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/libceph.h>
 #include <linux/ceph/messenger.h>
 
 #include "crypto.h"
@@ -698,6 +699,9 @@ static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
 {
 	int ret;
 
+	if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
+		return 0;
+
 	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
 			      msg, &msg->footer.sig);
 	if (ret < 0)
@@ -712,6 +716,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
 	__le64 sig_check;
 	int ret;
 
+	if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
+		return 0;
+
 	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
 			      msg, &sig_check);
 	if (ret < 0)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index d1494d1a8592..6b4d3a1684de 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -245,6 +245,8 @@ enum {
 	Opt_nocrc,
 	Opt_cephx_require_signatures,
 	Opt_nocephx_require_signatures,
+	Opt_cephx_sign_messages,
+	Opt_nocephx_sign_messages,
 	Opt_tcp_nodelay,
 	Opt_notcp_nodelay,
 };
@@ -267,6 +269,8 @@ static match_table_t opt_tokens = {
 	{Opt_nocrc, "nocrc"},
 	{Opt_cephx_require_signatures, "cephx_require_signatures"},
 	{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
+	{Opt_cephx_sign_messages, "cephx_sign_messages"},
+	{Opt_nocephx_sign_messages, "nocephx_sign_messages"},
 	{Opt_tcp_nodelay, "tcp_nodelay"},
 	{Opt_notcp_nodelay, "notcp_nodelay"},
 	{-1, NULL}
@@ -491,6 +495,12 @@ ceph_parse_options(char *options, const char *dev_name,
 		case Opt_nocephx_require_signatures:
 			opt->flags |= CEPH_OPT_NOMSGAUTH;
 			break;
+		case Opt_cephx_sign_messages:
+			opt->flags &= ~CEPH_OPT_NOMSGSIGN;
+			break;
+		case Opt_nocephx_sign_messages:
+			opt->flags |= CEPH_OPT_NOMSGSIGN;
+			break;
 
 		case Opt_tcp_nodelay:
 			opt->flags |= CEPH_OPT_TCP_NODELAY;
@@ -534,6 +544,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
 		seq_puts(m, "nocrc,");
 	if (opt->flags & CEPH_OPT_NOMSGAUTH)
 		seq_puts(m, "nocephx_require_signatures,");
+	if (opt->flags & CEPH_OPT_NOMSGSIGN)
+		seq_puts(m, "nocephx_sign_messages,");
 	if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
 		seq_puts(m, "notcp_nodelay,");
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 11108076bac3..0cc5608b2c8f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2677,7 +2677,7 @@ more:
 		if (ret <= 0) {
 			switch (ret) {
 			case -EBADMSG:
-				con->error_msg = "bad crc";
+				con->error_msg = "bad crc/signature";
 				/* fall through */
 			case -EBADE:
 				ret = -EIO;
-- 
cgit v1.2.3


From 1e935949111e77b2b1b6fa550e88ff0573c2f4c7 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 29 Sep 2015 01:27:24 -0700
Subject: watchdog: Always evaluate new timeout against min_timeout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Up to now, a new timeout value is only evaluated against min_timeout
if max_timeout is provided. This does not really make sense; a driver
can have a minimum timeout even if it does not have a maximum timeout.
Ensure that it is not smaller than min_timeout, even if max_timeout
is not set.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index d74a0e907b9e..e90e3ea5ebeb 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -119,8 +119,15 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway
 /* Use the following function to check if a timeout value is invalid */
 static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t)
 {
-	return ((wdd->max_timeout != 0) &&
-		(t < wdd->min_timeout || t > wdd->max_timeout));
+	/*
+	 * The timeout is invalid if
+	 * - the requested value is smaller than the configured minimum timeout,
+	 * or
+	 * - a maximum timeout is configured, and the requested value is larger
+	 *   than the maximum timeout.
+	 */
+	return t < wdd->min_timeout ||
+		(wdd->max_timeout && t > wdd->max_timeout);
 }
 
 /* Use the following functions to manipulate watchdog driver specific data */
-- 
cgit v1.2.3


From 8fbcf237439f841e7e9c4675790e08ea1c295bd3 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 3 Nov 2015 18:25:34 +0100
Subject: nfs: Remove unused xdr page offsets in getacl/setacl arguments

The arguments passed around for getacl and setacl xdr encoding, struct
nfs_setaclargs and struct nfs_getaclargs, both contain an array of
pages, an offset into the first page, and the length of the page data.
The offset is unused as it is always zero; remove it.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 5 ++---
 fs/nfs/nfs4xdr.c        | 4 ++--
 include/linux/nfs_xdr.h | 2 --
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87a081c6299d..7ed8f2cd97f8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4603,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
 #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
 
 static int buf_to_pages_noslab(const void *buf, size_t buflen,
-		struct page **pages, unsigned int *pgbase)
+		struct page **pages)
 {
 	struct page *newpage, **spages;
 	int rc = 0;
@@ -4747,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		goto out_free;
 
 	args.acl_len = npages * PAGE_SIZE;
-	args.acl_pgbase = 0;
 
 	dprintk("%s  buf %p buflen %zu npages %d args.acl_len %zu\n",
 		__func__, buf, buflen, npages, args.acl_len);
@@ -4839,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		return -EOPNOTSUPP;
 	if (npages > ARRAY_SIZE(pages))
 		return -ERANGE;
-	i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+	i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
 	if (i < 0)
 		return i;
 	nfs4_inode_return_delegation(inode);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9f656791a338..22a1ddd4fe96 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
 	p = reserve_space(xdr, 4);
 	*p = cpu_to_be32(arg->acl_len);
-	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
+	xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len);
 }
 
 static void
@@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
-		args->acl_pages, args->acl_pgbase, args->acl_len);
+		args->acl_pages, 0, args->acl_len);
 
 	encode_nops(&hdr);
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4728e7e5fc49..570d630f98ae 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -705,7 +705,6 @@ struct nfs_setaclargs {
 	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
-	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
 };
 
@@ -717,7 +716,6 @@ struct nfs_getaclargs {
 	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
-	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
 };
 
-- 
cgit v1.2.3


From 80220fa72b917c64675f3ba4008d2c5a7b50b281 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 3 Nov 2015 09:00:15 +0100
Subject: watchdog: include: fix some typos

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index e90e3ea5ebeb..5f18dd9ec224 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -25,7 +25,7 @@ struct watchdog_device;
  * @ping:	The routine that sends a keepalive ping to the watchdog device.
  * @status:	The routine that shows the status of the watchdog device.
  * @set_timeout:The routine for setting the watchdog devices timeout value.
- * @get_timeleft:The routine that get's the time that's left before a reset.
+ * @get_timeleft:The routine that gets the time left before a reset.
  * @ref:	The ref operation for dyn. allocated watchdog_device structs
  * @unref:	The unref operation for dyn. allocated watchdog_device structs
  * @ioctl:	The routines that handles extra ioctl calls.
@@ -33,7 +33,7 @@ struct watchdog_device;
  * The watchdog_ops structure contains a list of low-level operations
  * that control a watchdog device. It also contains the module that owns
  * these operations. The start and stop function are mandatory, all other
- * functions are optonal.
+ * functions are optional.
  */
 struct watchdog_ops {
 	struct module *owner;
-- 
cgit v1.2.3


From 760d280084f8805e5de73e3591912d5db9da9dbe Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 3 Nov 2015 09:00:16 +0100
Subject: watchdog: include: add units for timeout values in kerneldoc

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 5f18dd9ec224..027b1f43f12d 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -24,8 +24,8 @@ struct watchdog_device;
  * @stop:	The routine for stopping the watchdog device.
  * @ping:	The routine that sends a keepalive ping to the watchdog device.
  * @status:	The routine that shows the status of the watchdog device.
- * @set_timeout:The routine for setting the watchdog devices timeout value.
- * @get_timeleft:The routine that gets the time left before a reset.
+ * @set_timeout:The routine for setting the watchdog devices timeout value (in seconds).
+ * @get_timeleft:The routine that gets the time left before a reset (in seconds).
  * @ref:	The ref operation for dyn. allocated watchdog_device structs
  * @unref:	The unref operation for dyn. allocated watchdog_device structs
  * @ioctl:	The routines that handles extra ioctl calls.
@@ -59,9 +59,9 @@ struct watchdog_ops {
  * @info:	Pointer to a watchdog_info structure.
  * @ops:	Pointer to the list of watchdog operations.
  * @bootstatus:	Status of the watchdog device at boot.
- * @timeout:	The watchdog devices timeout value.
- * @min_timeout:The watchdog devices minimum timeout value.
- * @max_timeout:The watchdog devices maximum timeout value.
+ * @timeout:	The watchdog devices timeout value (in seconds).
+ * @min_timeout:The watchdog devices minimum timeout value (in seconds).
+ * @max_timeout:The watchdog devices maximum timeout value (in seconds).
  * @driver-data:Pointer to the drivers private data.
  * @lock:	Lock for watchdog core internal use only.
  * @status:	Field that contains the devices internal status bits.
-- 
cgit v1.2.3


From 033291eccbdb1b70ffc02641edae19ac825dc75d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 15 Oct 2015 15:08:48 -0600
Subject: vfio: Include No-IOMMU mode

There is really no way to safely give a user full access to a DMA
capable device without an IOMMU to protect the host system.  There is
also no way to provide DMA translation, for use cases such as device
assignment to virtual machines.  However, there are still those users
that want userspace drivers even under those conditions.  The UIO
driver exists for this use case, but does not provide the degree of
device access and programming that VFIO has.  In an effort to avoid
code duplication, this introduces a No-IOMMU mode for VFIO.

This mode requires building VFIO with CONFIG_VFIO_NOIOMMU and enabling
the "enable_unsafe_noiommu_mode" option on the vfio driver.  This
should make it very clear that this mode is not safe.  Additionally,
CAP_SYS_RAWIO privileges are necessary to work with groups and
containers using this mode.  Groups making use of this support are
named /dev/vfio/noiommu-$GROUP and can only make use of the special
VFIO_NOIOMMU_IOMMU for the container.  Use of this mode, specifically
binding a device without a native IOMMU group to a VFIO bus driver
will taint the kernel and should therefore not be considered
supported.  This patch includes no-iommu support for the vfio-pci bus
driver only.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vfio/Kconfig        |  15 ++++
 drivers/vfio/pci/vfio_pci.c |   8 +-
 drivers/vfio/vfio.c         | 186 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/vfio.h        |   3 +
 include/uapi/linux/vfio.h   |   7 ++
 5 files changed, 209 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 454017928ed0..b6d3cdc2791b 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -31,5 +31,20 @@ menuconfig VFIO
 
 	  If you don't know what to do here, say N.
 
+menuconfig VFIO_NOIOMMU
+	bool "VFIO No-IOMMU support"
+	depends on VFIO
+	help
+	  VFIO is built on the ability to isolate devices using the IOMMU.
+	  Only with an IOMMU can userspace access to DMA capable devices be
+	  considered secure.  VFIO No-IOMMU mode enables IOMMU groups for
+	  devices without IOMMU backing for the purpose of re-using the VFIO
+	  infrastructure in a non-secure mode.  Use of this mode will result
+	  in an unsupportable kernel and will therefore taint the kernel.
+	  Device assignment to virtual machines is also not possible with
+	  this mode since there is no IOMMU to provide DMA translation.
+
+	  If you don't know what to do here, say N.
+
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 964ad572aaee..32b88bd2c82c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
 		return -EINVAL;
 
-	group = iommu_group_get(&pdev->dev);
+	group = vfio_iommu_group_get(&pdev->dev);
 	if (!group)
 		return -EINVAL;
 
 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 	if (!vdev) {
-		iommu_group_put(group);
+		vfio_iommu_group_put(group, &pdev->dev);
 		return -ENOMEM;
 	}
 
@@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
 	if (ret) {
-		iommu_group_put(group);
+		vfio_iommu_group_put(group, &pdev->dev);
 		kfree(vdev);
 		return ret;
 	}
@@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 	if (!vdev)
 		return;
 
-	iommu_group_put(pdev->dev.iommu_group);
+	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
 	kfree(vdev);
 
 	if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index ab056f7af54d..de632da2e22f 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -62,6 +62,7 @@ struct vfio_container {
 	struct rw_semaphore		group_lock;
 	struct vfio_iommu_driver	*iommu_driver;
 	void				*iommu_data;
+	bool				noiommu;
 };
 
 struct vfio_unbound_dev {
@@ -84,6 +85,7 @@ struct vfio_group {
 	struct list_head		unbound_list;
 	struct mutex			unbound_lock;
 	atomic_t			opened;
+	bool				noiommu;
 };
 
 struct vfio_device {
@@ -95,6 +97,147 @@ struct vfio_device {
 	void				*device_data;
 };
 
+#ifdef CONFIG_VFIO_NOIOMMU
+static bool noiommu __read_mostly;
+module_param_named(enable_unsafe_noiommu_support,
+		   noiommu, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
+#endif
+
+/*
+ * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
+ * and remove functions, any use cases other than acquiring the first
+ * reference for the purpose of calling vfio_add_group_dev() or removing
+ * that symmetric reference after vfio_del_group_dev() should use the raw
+ * iommu_group_{get,put} functions.  In particular, vfio_iommu_group_put()
+ * removes the device from the dummy group and cannot be nested.
+ */
+struct iommu_group *vfio_iommu_group_get(struct device *dev)
+{
+	struct iommu_group *group;
+	int __maybe_unused ret;
+
+	group = iommu_group_get(dev);
+
+#ifdef CONFIG_VFIO_NOIOMMU
+	/*
+	 * With noiommu enabled, an IOMMU group will be created for a device
+	 * that doesn't already have one and doesn't have an iommu_ops on their
+	 * bus.  We use iommu_present() again in the main code to detect these
+	 * fake groups.
+	 */
+	if (group || !noiommu || iommu_present(dev->bus))
+		return group;
+
+	group = iommu_group_alloc();
+	if (IS_ERR(group))
+		return NULL;
+
+	iommu_group_set_name(group, "vfio-noiommu");
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+	if (ret)
+		return NULL;
+
+	/*
+	 * Where to taint?  At this point we've added an IOMMU group for a
+	 * device that is not backed by iommu_ops, therefore any iommu_
+	 * callback using iommu_ops can legitimately Oops.  So, while we may
+	 * be about to give a DMA capable device to a user without IOMMU
+	 * protection, which is clearly taint-worthy, let's go ahead and do
+	 * it here.
+	 */
+	add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
+#endif
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
+
+void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
+{
+#ifdef CONFIG_VFIO_NOIOMMU
+	if (!iommu_present(dev->bus))
+		iommu_group_remove_device(dev);
+#endif
+
+	iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
+
+#ifdef CONFIG_VFIO_NOIOMMU
+static void *vfio_noiommu_open(unsigned long arg)
+{
+	if (arg != VFIO_NOIOMMU_IOMMU)
+		return ERR_PTR(-EINVAL);
+	if (!capable(CAP_SYS_RAWIO))
+		return ERR_PTR(-EPERM);
+
+	return NULL;
+}
+
+static void vfio_noiommu_release(void *iommu_data)
+{
+}
+
+static long vfio_noiommu_ioctl(void *iommu_data,
+			       unsigned int cmd, unsigned long arg)
+{
+	if (cmd == VFIO_CHECK_EXTENSION)
+		return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0;
+
+	return -ENOTTY;
+}
+
+static int vfio_iommu_present(struct device *dev, void *unused)
+{
+	return iommu_present(dev->bus) ? 1 : 0;
+}
+
+static int vfio_noiommu_attach_group(void *iommu_data,
+				     struct iommu_group *iommu_group)
+{
+	return iommu_group_for_each_dev(iommu_group, NULL,
+					vfio_iommu_present) ? -EINVAL : 0;
+}
+
+static void vfio_noiommu_detach_group(void *iommu_data,
+				      struct iommu_group *iommu_group)
+{
+}
+
+static struct vfio_iommu_driver_ops vfio_noiommu_ops = {
+	.name = "vfio-noiommu",
+	.owner = THIS_MODULE,
+	.open = vfio_noiommu_open,
+	.release = vfio_noiommu_release,
+	.ioctl = vfio_noiommu_ioctl,
+	.attach_group = vfio_noiommu_attach_group,
+	.detach_group = vfio_noiommu_detach_group,
+};
+
+static struct vfio_iommu_driver vfio_noiommu_driver = {
+	.ops = &vfio_noiommu_ops,
+};
+
+/*
+ * Wrap IOMMU drivers, the noiommu driver is the one and only driver for
+ * noiommu groups (and thus containers) and not available for normal groups.
+ */
+#define vfio_for_each_iommu_driver(con, pos)				\
+	for (pos = con->noiommu ? &vfio_noiommu_driver :		\
+	     list_first_entry(&vfio.iommu_drivers_list,			\
+			      struct vfio_iommu_driver, vfio_next);	\
+	     (con->noiommu ? pos != NULL :				\
+			&pos->vfio_next != &vfio.iommu_drivers_list);	\
+	      pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next))
+#else
+#define vfio_for_each_iommu_driver(con, pos)				\
+	list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next)
+#endif
+
+
 /**
  * IOMMU driver registration
  */
@@ -199,7 +342,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
+					    bool noiommu)
 {
 	struct vfio_group *group, *tmp;
 	struct device *dev;
@@ -217,6 +361,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 	atomic_set(&group->container_users, 0);
 	atomic_set(&group->opened, 0);
 	group->iommu_group = iommu_group;
+	group->noiommu = noiommu;
 
 	group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -252,7 +397,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 
 	dev = device_create(vfio.class, NULL,
 			    MKDEV(MAJOR(vfio.group_devt), minor),
-			    group, "%d", iommu_group_id(iommu_group));
+			    group, "%s%d", noiommu ? "noiommu-" : "",
+			    iommu_group_id(iommu_group));
 	if (IS_ERR(dev)) {
 		vfio_free_group_minor(minor);
 		vfio_group_unlock_and_free(group);
@@ -640,7 +786,8 @@ int vfio_add_group_dev(struct device *dev,
 
 	group = vfio_group_get_from_iommu(iommu_group);
 	if (!group) {
-		group = vfio_create_group(iommu_group);
+		group = vfio_create_group(iommu_group,
+					  !iommu_present(dev->bus));
 		if (IS_ERR(group)) {
 			iommu_group_put(iommu_group);
 			return PTR_ERR(group);
@@ -852,8 +999,7 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
 		 */
 		if (!driver) {
 			mutex_lock(&vfio.iommu_drivers_lock);
-			list_for_each_entry(driver, &vfio.iommu_drivers_list,
-					    vfio_next) {
+			vfio_for_each_iommu_driver(container, driver) {
 				if (!try_module_get(driver->ops->owner))
 					continue;
 
@@ -922,7 +1068,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
 	}
 
 	mutex_lock(&vfio.iommu_drivers_lock);
-	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
+	vfio_for_each_iommu_driver(container, driver) {
 		void *data;
 
 		if (!try_module_get(driver->ops->owner))
@@ -1187,6 +1333,9 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	if (atomic_read(&group->container_users))
 		return -EINVAL;
 
+	if (group->noiommu && !capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
 	f = fdget(container_fd);
 	if (!f.file)
 		return -EBADF;
@@ -1202,6 +1351,13 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 
 	down_write(&container->group_lock);
 
+	/* Real groups and fake groups cannot mix */
+	if (!list_empty(&container->group_list) &&
+	    container->noiommu != group->noiommu) {
+		ret = -EPERM;
+		goto unlock_out;
+	}
+
 	driver = container->iommu_driver;
 	if (driver) {
 		ret = driver->ops->attach_group(container->iommu_data,
@@ -1211,6 +1367,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	}
 
 	group->container = container;
+	container->noiommu = group->noiommu;
 	list_add(&group->container_next, &container->group_list);
 
 	/* Get a reference on the container and mark a user within the group */
@@ -1241,6 +1398,9 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 	    !group->container->iommu_driver || !vfio_group_viable(group))
 		return -EINVAL;
 
+	if (group->noiommu && !capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
 	device = vfio_device_get_from_name(group, buf);
 	if (!device)
 		return -ENODEV;
@@ -1283,6 +1443,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 
 	fd_install(ret, filep);
 
+	if (group->noiommu)
+		dev_warn(device->dev, "vfio-noiommu device opened by user "
+			 "(%s:%d)\n", current->comm, task_pid_nr(current));
+
 	return ret;
 }
 
@@ -1371,6 +1535,11 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 	if (!group)
 		return -ENODEV;
 
+	if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
+		vfio_group_put(group);
+		return -EPERM;
+	}
+
 	/* Do we need multiple instances of the group open?  Seems not. */
 	opened = atomic_cmpxchg(&group->opened, 0, 1);
 	if (opened) {
@@ -1533,6 +1702,11 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
 	if (!atomic_inc_not_zero(&group->container_users))
 		return ERR_PTR(-EINVAL);
 
+	if (group->noiommu) {
+		atomic_dec(&group->container_users);
+		return ERR_PTR(-EPERM);
+	}
+
 	if (!group->container->iommu_driver ||
 			!vfio_group_viable(group)) {
 		atomic_dec(&group->container_users);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ddb440975382..610a86a892b8 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,6 +44,9 @@ struct vfio_device_ops {
 	void	(*request)(void *device_data, unsigned int count);
 };
 
+extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
+extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
+
 extern int vfio_add_group_dev(struct device *dev,
 			      const struct vfio_device_ops *ops,
 			      void *device_data);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9fd7b5d8df2f..751b69f858c8 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -38,6 +38,13 @@
 
 #define VFIO_SPAPR_TCE_v2_IOMMU		7
 
+/*
+ * The No-IOMMU IOMMU offers no translation or isolation for devices and
+ * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
+ * code will taint the host kernel and should be used with extreme caution.
+ */
+#define VFIO_NOIOMMU_IOMMU		8
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
-- 
cgit v1.2.3


From e02328f47bd75fde9decf9657ec7d769b370f857 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 8 Sep 2015 14:17:47 +0200
Subject: vga_switcheroo: Drop client power state VGA_SWITCHEROO_INIT

hda_intel.c:azx_probe() defers initialization of an audio controller
on the discrete GPU if the GPU is powered off. The power state of the
GPU is determined by calling vga_switcheroo_get_client_state().

vga_switcheroo_get_client_state() returns VGA_SWITCHEROO_INIT if
vga_switcheroo is not enabled, i.e. if no second GPU or no handler
has registered.

This can go wrong in the following scenario:
- Driver for the integrated GPU is not loaded.
- Driver for the discrete GPU registers with vga_switcheroo, uses driver
  power control to power down the GPU, handler cuts power to the GPU.
- Driver for the audio controller gets loaded after the GPU was powered
  down, calls vga_switcheroo_get_client_state() which returns
  VGA_SWITCHEROO_INIT instead of VGA_SWITCHEROO_OFF.
- Consequence: azx_probe() tries to initialize the audio controller even
  though the GPU is powered down.

The power state VGA_SWITCHEROO_INIT was introduced by c8e9cf7bb240
("vga_switcheroo: Add a helper function to get the client state").
It is not apparent what its benefit might be. The idea seems to
be to initialize the audio controller even if the power state is
VGA_SWITCHEROO_OFF (were vga_switcheroo enabled), but as shown
above this can fail.

Drop VGA_SWITCHEROO_INIT to solve this.

Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/vga/vga_switcheroo.c | 2 --
 include/linux/vga_switcheroo.h   | 5 -----
 2 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 56bbbd65ae8a..41edd5a3f100 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -356,8 +356,6 @@ enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *pdev)
 	client = find_client_from_pci(&vgasr_priv.clients, pdev);
 	if (!client)
 		ret = VGA_SWITCHEROO_NOT_FOUND;
-	else if (!vgasr_priv.active)
-		ret = VGA_SWITCHEROO_INIT;
 	else
 		ret = client->pwr_state;
 	mutex_unlock(&vgasr_mutex);
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 786bc931dbd1..69e1d4a1f1b3 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -39,10 +39,6 @@ struct pci_dev;
  * enum vga_switcheroo_state - client power state
  * @VGA_SWITCHEROO_OFF: off
  * @VGA_SWITCHEROO_ON: on
- * @VGA_SWITCHEROO_INIT: client has registered with vga_switcheroo but
- * 	vga_switcheroo is not enabled, i.e. no second client or no handler
- * 	has registered. Only used in vga_switcheroo_get_client_state() which
- * 	in turn is only called from hda_intel.c
  * @VGA_SWITCHEROO_NOT_FOUND: client has not registered with vga_switcheroo.
  * 	Only used in vga_switcheroo_get_client_state() which in turn is only
  * 	called from hda_intel.c
@@ -53,7 +49,6 @@ enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
 	/* below are referred only from vga_switcheroo_get_client_state() */
-	VGA_SWITCHEROO_INIT,
 	VGA_SWITCHEROO_NOT_FOUND,
 };
 
-- 
cgit v1.2.3


From 8f25348b65cd073f77945f559ab1e5de83422cd1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 4 Nov 2015 14:59:06 +0100
Subject: net: add forgotten IFF_L3MDEV_SLAVE define

Fixes: fee6d4c77 ("net: Add netif_is_l3_slave")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4ac653b7b8ac..2c00772bd136 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1322,6 +1322,7 @@ enum netdev_priv_flags {
 #define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
+#define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
 
 /**
  *	struct net_device - The DEVICE structure.
-- 
cgit v1.2.3


From 805c4bc05705fb2b71ec970960b456eee9900953 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 5 Nov 2015 11:07:13 -0800
Subject: tcp: fix req->saved_syn race

For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
ireq->pktopts race"), we need to make sure we do not access
req->saved_syn unless we own the request sock.

This fixes races for listeners using TCP_SAVE_SYN option.

Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Ying Cai <ycai@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  7 +++++++
 net/ipv4/tcp_ipv4.c      |  2 ++
 net/ipv4/tcp_minisocks.c |  3 ---
 net/ipv6/tcp_ipv6.c      | 20 ++++++++++++--------
 4 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c906f4534581..b386361ba3e8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -397,6 +397,13 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
 	queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
 }
 
+static inline void tcp_move_syn(struct tcp_sock *tp,
+				struct request_sock *req)
+{
+	tp->saved_syn = req->saved_syn;
+	req->saved_syn = NULL;
+}
+
 static inline void tcp_saved_syn_free(struct tcp_sock *tp)
 {
 	kfree(tp->saved_syn);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1c2648bbac4b..59aff63b1776 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1326,6 +1326,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	if (*own_req)
+		tcp_move_syn(newtp, req_unhash);
 
 	return newsk;
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3575dd1e5b67..ac6b1961ffeb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -551,9 +551,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->rack.mstamp.v64 = 0;
 		newtp->rack.advanced = 0;
 
-		newtp->saved_syn = req->saved_syn;
-		req->saved_syn = NULL;
-
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
 	return newsk;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ea2f4d5440b5..c509e5562429 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1140,14 +1140,18 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 		goto out;
 	}
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-	/* Clone pktoptions received with SYN, if we own the req */
-	if (*own_req && ireq->pktopts) {
-		newnp->pktoptions = skb_clone(ireq->pktopts,
-					      sk_gfp_atomic(sk, GFP_ATOMIC));
-		consume_skb(ireq->pktopts);
-		ireq->pktopts = NULL;
-		if (newnp->pktoptions)
-			skb_set_owner_r(newnp->pktoptions, newsk);
+	if (*own_req) {
+		tcp_move_syn(newtp, req_unhash);
+
+		/* Clone pktoptions received with SYN, if we own the req */
+		if (ireq->pktopts) {
+			newnp->pktoptions = skb_clone(ireq->pktopts,
+						      sk_gfp_atomic(sk, GFP_ATOMIC));
+			consume_skb(ireq->pktopts);
+			ireq->pktopts = NULL;
+			if (newnp->pktoptions)
+				skb_set_owner_r(newnp->pktoptions, newsk);
+		}
 	}
 
 	return newsk;
-- 
cgit v1.2.3


From 61b590b9ee4221173ad6990a1150c5c9db73564e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 23 Oct 2015 12:43:18 +0200
Subject: netfilter: ingress: don't use nf_hook_list_active

nf_hook_list_active() always returns true once at least one device has
NF_INGRESS hook enabled.

Thus, don't use this function. Instead, inverse the test and use the static
key to elide list_empty test if no NF_INGRESS hooks are active.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 187feabe557c..ba7ce8805fe3 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -5,10 +5,13 @@
 #include <linux/netdevice.h>
 
 #ifdef CONFIG_NETFILTER_INGRESS
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
 {
-	return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
-				   NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+#ifdef HAVE_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+		return false;
+#endif
+	return !list_empty(&skb->dev->nf_hooks_ingress);
 }
 
 static inline int nf_hook_ingress(struct sk_buff *skb)
-- 
cgit v1.2.3


From b4865988eab598e56e6e628b9b32441acd142b28 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 6 Nov 2015 18:35:57 +0100
Subject: netfilter: ingress: fix wrong input interface on hook

The input and output interfaces in nf_hook_state_init() are flipped.
This fixes iif matching on nftables.

Reported-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index ba7ce8805fe3..5fcd375ef175 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -19,8 +19,8 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	struct nf_hook_state state;
 
 	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
-			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
-			   skb->dev, NULL, dev_net(skb->dev), NULL);
+			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
+			   skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
 }
 
-- 
cgit v1.2.3


From 1b9863c6aa56d92126ec0d5c42eae25df52b7ca1 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:47 -0700
Subject: device property: Introducing enum dev_dma_attr

A device could have one of the following DMA attributes:
    * DMA not supported
    * DMA non-coherent
    * DMA coherent

So, this patch introduces enum dev_dma_attribute. This will be used by
new APIs introduced in later patches.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index 463de52fe891..8eecf200bae5 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -27,6 +27,12 @@ enum dev_prop_type {
 	DEV_PROP_MAX,
 };
 
+enum dev_dma_attr {
+	DEV_DMA_NOT_SUPPORTED,
+	DEV_DMA_NON_COHERENT,
+	DEV_DMA_COHERENT,
+};
+
 bool device_property_present(struct device *dev, const char *propname);
 int device_property_read_u8_array(struct device *dev, const char *propname,
 				  u8 *val, size_t nval);
-- 
cgit v1.2.3


From b84f196d963c3159329f72ca1913b08679004a43 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:48 -0700
Subject: ACPI: Adding DMA Attribute APIs for ACPI Device

Adding acpi_get_dma_attr() to query DMA attributes of ACPI devices.
It returns the enum dev_dma_attr, which communicates DMA information
more clearly. This API replaces the acpi_check_dma(), which will be
removed in subsequent patch.

This patch also provides a convenient function, acpi_dma_supported(),
to check DMA support of the specified ACPI device.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c     | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/acpi/acpi_bus.h |  3 +++
 include/linux/acpi.h    | 10 ++++++++++
 3 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d1ce377db3e9..ed3d76fadccf 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1308,6 +1308,48 @@ void acpi_free_pnp_ids(struct acpi_device_pnp *pnp)
 	kfree(pnp->unique_id);
 }
 
+/**
+ * acpi_dma_supported - Check DMA support for the specified device.
+ * @adev: The pointer to acpi device
+ *
+ * Return false if DMA is not supported. Otherwise, return true
+ */
+bool acpi_dma_supported(struct acpi_device *adev)
+{
+	if (!adev)
+		return false;
+
+	if (adev->flags.cca_seen)
+		return true;
+
+	/*
+	* Per ACPI 6.0 sec 6.2.17, assume devices can do cache-coherent
+	* DMA on "Intel platforms".  Presumably that includes all x86 and
+	* ia64, and other arches will set CONFIG_ACPI_CCA_REQUIRED=y.
+	*/
+	if (!IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED))
+		return true;
+
+	return false;
+}
+
+/**
+ * acpi_get_dma_attr - Check the supported DMA attr for the specified device.
+ * @adev: The pointer to acpi device
+ *
+ * Return enum dev_dma_attr.
+ */
+enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
+{
+	if (!acpi_dma_supported(adev))
+		return DEV_DMA_NOT_SUPPORTED;
+
+	if (adev->flags.coherent_dma)
+		return DEV_DMA_COHERENT;
+	else
+		return DEV_DMA_NON_COHERENT;
+}
+
 static void acpi_init_coherency(struct acpi_device *adev)
 {
 	unsigned long long cca = 0;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8df990520304..e56e6520edce 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -596,6 +596,9 @@ struct acpi_pci_root {
 
 /* helper */
 
+bool acpi_dma_supported(struct acpi_device *adev);
+enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
+
 struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
 					   u64 address, bool check_children);
 int acpi_is_root_bridge(acpi_handle);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 496265b0f527..292af3b69ede 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -579,6 +579,16 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
 	return false;
 }
 
+static inline bool acpi_dma_supported(struct acpi_device *adev)
+{
+	return false;
+}
+
+static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
+{
+	return DEV_DMA_NOT_SUPPORTED;
+}
+
 #define ACPI_PTR(_ptr)	(NULL)
 
 #endif	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From e5e558644bbb23cad03c586703331b8bcd9e0e6c Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:49 -0700
Subject: device property: Adding DMA Attribute APIs for Generic Devices

The function device_dma_is_coherent() does not sufficiently
communicate device DMA attributes. Instead, this patch introduces
device_get_dma_attr(), which returns enum dev_dma_attr.
It replaces the acpi_check_dma(), which will be removed in
subsequent patch.

This also provides a convenient function, device_dma_supported(),
to check DMA support of the specified device.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 29 +++++++++++++++++++++++++++++
 include/linux/property.h |  4 ++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index de40623bbd8a..05d57a2afa05 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -611,6 +611,35 @@ bool device_dma_is_coherent(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_dma_is_coherent);
 
+bool device_dma_supported(struct device *dev)
+{
+	/* For DT, this is always supported.
+	 * For ACPI, this depends on CCA, which
+	 * is determined by the acpi_dma_supported().
+	 */
+	if (IS_ENABLED(CONFIG_OF) && dev->of_node)
+		return true;
+
+	return acpi_dma_supported(ACPI_COMPANION(dev));
+}
+EXPORT_SYMBOL_GPL(device_dma_supported);
+
+enum dev_dma_attr device_get_dma_attr(struct device *dev)
+{
+	enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED;
+
+	if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
+		if (of_dma_is_coherent(dev->of_node))
+			attr = DEV_DMA_COHERENT;
+		else
+			attr = DEV_DMA_NON_COHERENT;
+	} else
+		attr = acpi_get_dma_attr(ACPI_COMPANION(dev));
+
+	return attr;
+}
+EXPORT_SYMBOL_GPL(device_get_dma_attr);
+
 /**
  * device_get_phy_mode - Get phy mode for given device
  * @dev:	Pointer to the given device
diff --git a/include/linux/property.h b/include/linux/property.h
index 8eecf200bae5..7200490b7e6f 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -176,6 +176,10 @@ void device_add_property_set(struct device *dev, struct property_set *pset);
 
 bool device_dma_is_coherent(struct device *dev);
 
+bool device_dma_supported(struct device *dev);
+
+enum dev_dma_attr device_get_dma_attr(struct device *dev);
+
 int device_get_phy_mode(struct device *dev);
 
 void *device_get_mac_address(struct device *dev, char *addr, int alen);
-- 
cgit v1.2.3


From ab3d527329f01dd63dc852041006d1a24895d116 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:51 -0700
Subject: device property: ACPI: Remove unused DMA APIs

These DMA APIs are replaced with the newer versions, which return
the enum dev_dma_attr. So, we can safely remove them.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 13 -------------
 include/acpi/acpi_bus.h  | 34 ----------------------------------
 include/linux/acpi.h     |  5 -----
 include/linux/property.h |  2 --
 4 files changed, 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 05d57a2afa05..1325ff225cc4 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -598,19 +598,6 @@ unsigned int device_get_child_node_count(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_get_child_node_count);
 
-bool device_dma_is_coherent(struct device *dev)
-{
-	bool coherent = false;
-
-	if (IS_ENABLED(CONFIG_OF) && dev->of_node)
-		coherent = of_dma_is_coherent(dev->of_node);
-	else
-		acpi_check_dma(ACPI_COMPANION(dev), &coherent);
-
-	return coherent;
-}
-EXPORT_SYMBOL_GPL(device_dma_is_coherent);
-
 bool device_dma_supported(struct device *dev)
 {
 	/* For DT, this is always supported.
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index e56e6520edce..e45d58d6b0a7 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -390,40 +390,6 @@ struct acpi_data_node {
 	struct completion kobj_done;
 };
 
-static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
-{
-	bool ret = false;
-
-	if (!adev)
-		return ret;
-
-	/**
-	 * Currently, we only support _CCA=1 (i.e. coherent_dma=1)
-	 * This should be equivalent to specifyig dma-coherent for
-	 * a device in OF.
-	 *
-	 * For the case when _CCA=0 (i.e. coherent_dma=0 && cca_seen=1),
-	 * There are two cases:
-	 * case 1. Do not support and disable DMA.
-	 * case 2. Support but rely on arch-specific cache maintenance for
-	 *         non-coherence DMA operations.
-	 * Currently, we implement case 2 above.
-	 *
-	 * For the case when _CCA is missing (i.e. cca_seen=0) and
-	 * platform specifies ACPI_CCA_REQUIRED, we do not support DMA,
-	 * and fallback to arch-specific default handling.
-	 *
-	 * See acpi_init_coherency() for more info.
-	 */
-	if (adev->flags.coherent_dma ||
-	    (adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) {
-		ret = true;
-		if (coherent)
-			*coherent = adev->flags.coherent_dma;
-	}
-	return ret;
-}
-
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 {
 	return fwnode && (fwnode->type == FWNODE_ACPI
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 292af3b69ede..b5868300df75 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -574,11 +574,6 @@ static inline int acpi_device_modalias(struct device *dev,
 	return -ENODEV;
 }
 
-static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent)
-{
-	return false;
-}
-
 static inline bool acpi_dma_supported(struct acpi_device *adev)
 {
 	return false;
diff --git a/include/linux/property.h b/include/linux/property.h
index 7200490b7e6f..0a3705a7c9f2 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -174,8 +174,6 @@ struct property_set {
 
 void device_add_property_set(struct device *dev, struct property_set *pset);
 
-bool device_dma_is_coherent(struct device *dev);
-
 bool device_dma_supported(struct device *dev);
 
 enum dev_dma_attr device_get_dma_attr(struct device *dev);
-- 
cgit v1.2.3


From 50230713b63941f4b6b562eea0834f751aa0801e Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 28 Oct 2015 15:50:53 -0700
Subject: PCI: OF: Move of_pci_dma_configure() to pci_dma_configure()

This patch move of_pci_dma_configure() to a more generic
pci_dma_configure(), which can be extended by non-OF code (e.g. ACPI).

This has no functional change.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Acked-by: Rob Herring <robh+dt@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/of/of_pci.c    | 19 -------------------
 drivers/pci/probe.c    | 22 +++++++++++++++++++++-
 include/linux/of_pci.h |  3 ---
 3 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index a2f510cc683b..b66ee4ebf650 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -117,25 +117,6 @@ int of_get_pci_domain_nr(struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
-/**
- * of_pci_dma_configure - Setup DMA configuration
- * @dev: ptr to pci_dev struct of the PCI device
- *
- * Function to update PCI devices's DMA configuration using the same
- * info from the OF node of host bridge's parent (if any).
- */
-void of_pci_dma_configure(struct pci_dev *pci_dev)
-{
-	struct device *dev = &pci_dev->dev;
-	struct device *bridge = pci_get_host_bridge_device(pci_dev);
-
-	if (bridge->parent)
-		of_dma_configure(dev, bridge->parent->of_node);
-
-	pci_put_host_bridge_device(bridge);
-}
-EXPORT_SYMBOL_GPL(of_pci_dma_configure);
-
 #if defined(CONFIG_OF_ADDRESS)
 /**
  * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8361d27e5eca..d35f83d80b15 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/of_device.h>
 #include <linux/of_pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/slab.h>
@@ -1633,6 +1634,25 @@ static void pci_set_msi_domain(struct pci_dev *dev)
 				   dev_get_msi_domain(&dev->bus->dev));
 }
 
+/**
+ * pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to pci_dev struct of the PCI device
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node of host bridge's parent (if any).
+ */
+static void pci_dma_configure(struct pci_dev *dev)
+{
+	struct device *bridge = pci_get_host_bridge_device(dev);
+
+	if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) {
+		if (bridge->parent)
+			of_dma_configure(&dev->dev, bridge->parent->of_node);
+	}
+
+	pci_put_host_bridge_device(bridge);
+}
+
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 {
 	int ret;
@@ -1646,7 +1666,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.dma_mask = &dev->dma_mask;
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
-	of_pci_dma_configure(dev);
+	pci_dma_configure(dev);
 
 	pci_set_dma_max_seg_size(dev, 65536);
 	pci_set_dma_seg_boundary(dev, 0xffffffff);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 29fd3fe1c035..ce0e5abeb454 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -16,7 +16,6 @@ int of_pci_get_devfn(struct device_node *np);
 int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
-void of_pci_dma_configure(struct pci_dev *pci_dev);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -51,8 +50,6 @@ of_get_pci_domain_nr(struct device_node *node)
 {
 	return -1;
 }
-
-static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { }
 #endif
 
 #if defined(CONFIG_OF_ADDRESS)
-- 
cgit v1.2.3


From e2b19197ff9dc46f3e3888f273c4395f9e5a9856 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:09 -0800
Subject: mm, page_alloc: remove unnecessary parameter from
 zone_watermark_ok_safe

Overall, the intent of this series is to remove the zonelist cache which
was introduced to avoid high overhead in the page allocator.  Once this is
done, it is necessary to reduce the cost of watermark checks.

The series starts with minor micro-optimisations.

Next it notes that GFP flags that affect watermark checks are abused.
__GFP_WAIT historically identified callers that could not sleep and could
access reserves.  This was later abused to identify callers that simply
prefer to avoid sleeping and have other options.  A patch distinguishes
between atomic callers, high-priority callers and those that simply wish
to avoid sleep.

The zonelist cache has been around for a long time but it is of dubious
merit with a lot of complexity and some issues that are explained.  The
most important issue is that a failed THP allocation can cause a zone to
be treated as "full".  This potentially causes unnecessary stalls, reclaim
activity or remote fallbacks.  The issues could be fixed but it's not
worth it.  The series places a small number of other micro-optimisations
on top before examining GFP flags watermarks.

High-order watermarks enforcement can cause high-order allocations to fail
even though pages are free.  The watermark checks both protect high-order
atomic allocations and make kswapd aware of high-order pages but there is
a much better way that can be handled using migrate types.  This series
uses page grouping by mobility to reserve pageblocks for high-order
allocations with the size of the reservation depending on demand.  kswapd
awareness is maintained by examining the free lists.  By patch 12 in this
series, there are no high-order watermark checks while preserving the
properties that motivated the introduction of the watermark checks.

This patch (of 10):

No user of zone_watermark_ok_safe() specifies alloc_flags.  This patch
removes the unnecessary parameter.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 mm/page_alloc.c        | 5 +++--
 mm/vmscan.c            | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2d7e660cdefe..e326843c995a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -817,7 +817,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
 bool zone_watermark_ok(struct zone *z, unsigned int order,
 		unsigned long mark, int classzone_idx, int alloc_flags);
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-		unsigned long mark, int classzone_idx, int alloc_flags);
+		unsigned long mark, int classzone_idx);
 enum memmap_context {
 	MEMMAP_EARLY,
 	MEMMAP_HOTPLUG,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 446bb36ee59d..d73c346d91b3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2249,6 +2249,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 		min -= min / 2;
 	if (alloc_flags & ALLOC_HARDER)
 		min -= min / 4;
+
 #ifdef CONFIG_CMA
 	/* If allocation can't use CMA areas don't use free CMA pages */
 	if (!(alloc_flags & ALLOC_CMA))
@@ -2278,14 +2279,14 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 }
 
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-			unsigned long mark, int classzone_idx, int alloc_flags)
+			unsigned long mark, int classzone_idx)
 {
 	long free_pages = zone_page_state(z, NR_FREE_PAGES);
 
 	if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
 		free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
 
-	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+	return __zone_watermark_ok(z, order, mark, classzone_idx, 0,
 								free_pages);
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 55721b619aee..e0cd7eed4e38 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2477,7 +2477,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
 	balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
 			zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
 	watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
-	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0);
 
 	/*
 	 * If compaction is deferred, reclaim up to a point where
@@ -2960,7 +2960,7 @@ static bool zone_balanced(struct zone *zone, int order,
 			  unsigned long balance_gap, int classzone_idx)
 {
 	if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
-				    balance_gap, classzone_idx, 0))
+				    balance_gap, classzone_idx))
 		return false;
 
 	if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
-- 
cgit v1.2.3


From 46e700abc44ce215acb4341d9702ce3972eda571 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:15 -0800
Subject: mm, page_alloc: remove unnecessary taking of a seqlock when cpusets
 are disabled

There is a seqcounter that protects against spurious allocation failures
when a task is changing the allowed nodes in a cpuset.  There is no need
to check the seqcounter until a cpuset exists.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 5a1311942358..85a868ccb493 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -104,6 +104,9 @@ extern void cpuset_print_current_mems_allowed(void);
  */
 static inline unsigned int read_mems_allowed_begin(void)
 {
+	if (!cpusets_enabled())
+		return 0;
+
 	return read_seqcount_begin(&current->mems_allowed_seq);
 }
 
@@ -115,6 +118,9 @@ static inline unsigned int read_mems_allowed_begin(void)
  */
 static inline bool read_mems_allowed_retry(unsigned int seq)
 {
+	if (!cpusets_enabled())
+		return false;
+
 	return read_seqcount_retry(&current->mems_allowed_seq, seq);
 }
 
-- 
cgit v1.2.3


From 016c13daa5c9e4827eca703e2f0621c131f2cca3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:18 -0800
Subject: mm, page_alloc: use masks and shifts when converting GFP flags to
 migrate types

This patch redefines which GFP bits are used for specifying mobility and
the order of the migrate types.  Once redefined it's possible to convert
GFP flags to a migrate type with a simple mask and shift.  The only
downside is that readers of OOM kill messages and allocation failures may
have been used to the existing values but scripts/gfp-translate will help.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    | 12 +++++++-----
 include/linux/mmzone.h |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f92cbd2f4450..440fca3e7e5d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,7 +14,7 @@ struct vm_area_struct;
 #define ___GFP_HIGHMEM		0x02u
 #define ___GFP_DMA32		0x04u
 #define ___GFP_MOVABLE		0x08u
-#define ___GFP_WAIT		0x10u
+#define ___GFP_RECLAIMABLE	0x10u
 #define ___GFP_HIGH		0x20u
 #define ___GFP_IO		0x40u
 #define ___GFP_FS		0x80u
@@ -29,7 +29,7 @@ struct vm_area_struct;
 #define ___GFP_NOMEMALLOC	0x10000u
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
-#define ___GFP_RECLAIMABLE	0x80000u
+#define ___GFP_WAIT		0x80000u
 #define ___GFP_NOACCOUNT	0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
@@ -126,6 +126,7 @@ struct vm_area_struct;
 
 /* This mask makes up all the page movable related flags */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+#define GFP_MOVABLE_SHIFT 3
 
 /* Control page allocator reclaim behavior */
 #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
@@ -152,14 +153,15 @@ struct vm_area_struct;
 /* Convert GFP flags to their corresponding migrate type */
 static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
-	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+	BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
+	BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
 
 	if (unlikely(page_group_by_mobility_disabled))
 		return MIGRATE_UNMOVABLE;
 
 	/* Group based on mobility */
-	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
-		((gfp_flags & __GFP_RECLAIMABLE) != 0);
+	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
 }
 
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e326843c995a..38bed71758ab 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,8 +37,8 @@
 
 enum {
 	MIGRATE_UNMOVABLE,
-	MIGRATE_RECLAIMABLE,
 	MIGRATE_MOVABLE,
+	MIGRATE_RECLAIMABLE,
 	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
 	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
 #ifdef CONFIG_CMA
-- 
cgit v1.2.3


From d0164adc89f6bb374d304ffcc375c6d2652fe67d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:21 -0800
Subject: mm, page_alloc: distinguish between being unable to sleep, unwilling
 to sleep and avoiding waking kswapd

__GFP_WAIT has been used to identify atomic context in callers that hold
spinlocks or are in interrupts.  They are expected to be high priority and
have access one of two watermarks lower than "min" which can be referred
to as the "atomic reserve".  __GFP_HIGH users get access to the first
lower watermark and can be called the "high priority reserve".

Over time, callers had a requirement to not block when fallback options
were available.  Some have abused __GFP_WAIT leading to a situation where
an optimisitic allocation with a fallback option can access atomic
reserves.

This patch uses __GFP_ATOMIC to identify callers that are truely atomic,
cannot sleep and have no alternative.  High priority users continue to use
__GFP_HIGH.  __GFP_DIRECT_RECLAIM identifies callers that can sleep and
are willing to enter direct reclaim.  __GFP_KSWAPD_RECLAIM to identify
callers that want to wake kswapd for background reclaim.  __GFP_WAIT is
redefined as a caller that is willing to enter direct reclaim and wake
kswapd for background reclaim.

This patch then converts a number of sites

o __GFP_ATOMIC is used by callers that are high priority and have memory
  pools for those requests. GFP_ATOMIC uses this flag.

o Callers that have a limited mempool to guarantee forward progress clear
  __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall
  into this category where kswapd will still be woken but atomic reserves
  are not used as there is a one-entry mempool to guarantee progress.

o Callers that are checking if they are non-blocking should use the
  helper gfpflags_allow_blocking() where possible. This is because
  checking for __GFP_WAIT as was done historically now can trigger false
  positives. Some exceptions like dm-crypt.c exist where the code intent
  is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to
  flag manipulations.

o Callers that built their own GFP flags instead of starting with GFP_KERNEL
  and friends now also need to specify __GFP_KSWAPD_RECLAIM.

The first key hazard to watch out for is callers that removed __GFP_WAIT
and was depending on access to atomic reserves for inconspicuous reasons.
In some cases it may be appropriate for them to use __GFP_HIGH.

The second key hazard is callers that assembled their own combination of
GFP flags instead of starting with something like GFP_KERNEL.  They may
now wish to specify __GFP_KSWAPD_RECLAIM.  It's almost certainly harmless
if it's missed in most cases as other activity will wake kswapd.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/balance                           | 14 ++++---
 arch/arm/mm/dma-mapping.c                          |  6 +--
 arch/arm/xen/mm.c                                  |  2 +-
 arch/arm64/mm/dma-mapping.c                        |  4 +-
 arch/x86/kernel/pci-dma.c                          |  2 +-
 block/bio.c                                        | 26 ++++++------
 block/blk-core.c                                   | 16 ++++----
 block/blk-ioc.c                                    |  2 +-
 block/blk-mq-tag.c                                 |  2 +-
 block/blk-mq.c                                     |  6 +--
 drivers/block/drbd/drbd_receiver.c                 |  3 +-
 drivers/block/osdblk.c                             |  2 +-
 drivers/connector/connector.c                      |  3 +-
 drivers/firewire/core-cdev.c                       |  2 +-
 drivers/gpu/drm/i915/i915_gem.c                    |  2 +-
 drivers/infiniband/core/sa_query.c                 |  2 +-
 drivers/iommu/amd_iommu.c                          |  2 +-
 drivers/iommu/intel-iommu.c                        |  2 +-
 drivers/md/dm-crypt.c                              |  6 +--
 drivers/md/dm-kcopyd.c                             |  2 +-
 drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c     |  2 +-
 drivers/media/pci/solo6x10/solo6x10-v4l2.c         |  2 +-
 drivers/media/pci/tw68/tw68-video.c                |  2 +-
 drivers/mtd/mtdcore.c                              |  3 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  2 +-
 drivers/staging/android/ion/ion_system_heap.c      |  2 +-
 .../lustre/include/linux/libcfs/libcfs_private.h   |  2 +-
 drivers/usb/host/u132-hcd.c                        |  2 +-
 drivers/video/fbdev/vermilion/vermilion.c          |  2 +-
 fs/btrfs/disk-io.c                                 |  2 +-
 fs/btrfs/extent_io.c                               | 14 +++----
 fs/btrfs/volumes.c                                 |  4 +-
 fs/ext4/super.c                                    |  2 +-
 fs/fscache/cookie.c                                |  2 +-
 fs/fscache/page.c                                  |  6 +--
 fs/jbd2/transaction.c                              |  4 +-
 fs/nfs/file.c                                      |  6 +--
 fs/xfs/xfs_qm.c                                    |  2 +-
 include/linux/gfp.h                                | 46 ++++++++++++++++------
 include/linux/skbuff.h                             |  6 +--
 include/net/sock.h                                 |  2 +-
 include/trace/events/gfpflags.h                    |  5 ++-
 kernel/audit.c                                     |  6 +--
 kernel/cgroup.c                                    |  2 +-
 kernel/locking/lockdep.c                           |  2 +-
 kernel/power/snapshot.c                            |  2 +-
 kernel/smp.c                                       |  2 +-
 lib/idr.c                                          |  4 +-
 lib/radix-tree.c                                   | 10 ++---
 mm/backing-dev.c                                   |  2 +-
 mm/dmapool.c                                       |  2 +-
 mm/memcontrol.c                                    |  6 +--
 mm/mempool.c                                       | 10 ++---
 mm/migrate.c                                       |  2 +-
 mm/page_alloc.c                                    | 43 ++++++++++++--------
 mm/slab.c                                          | 18 ++++-----
 mm/slub.c                                          | 10 ++---
 mm/vmalloc.c                                       |  2 +-
 mm/vmscan.c                                        |  4 +-
 mm/zswap.c                                         |  5 ++-
 net/core/skbuff.c                                  |  8 ++--
 net/core/sock.c                                    |  6 ++-
 net/netlink/af_netlink.c                           |  2 +-
 net/rds/ib_recv.c                                  |  4 +-
 net/rxrpc/ar-connection.c                          |  2 +-
 net/sctp/associola.c                               |  2 +-
 66 files changed, 210 insertions(+), 172 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/balance b/Documentation/vm/balance
index c46e68cf9344..964595481af6 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance
@@ -1,12 +1,14 @@
 Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
 
-Memory balancing is needed for non __GFP_WAIT as well as for non
-__GFP_IO allocations.
+Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
+well as for non __GFP_IO allocations.
 
-There are two reasons to be requesting non __GFP_WAIT allocations:
-the caller can not sleep (typically intr context), or does not want
-to incur cost overheads of page stealing and possible swap io for
-whatever reasons.
+The first reason why a caller may avoid reclaim is that the caller can not
+sleep due to holding a spinlock or is in interrupt context. The second may
+be that the caller is willing to fail the allocation without incurring the
+overhead of page reclaim. This may happen for opportunistic high-order
+allocation requests that have order-0 fallback options. In such cases,
+the caller may also wish to avoid waking kswapd.
 
 __GFP_IO allocation requests are made to prevent file system deadlocks.
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ad4eb2d26e16..e62400e5fb99 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -651,12 +651,12 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
 	if (nommu())
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
-	else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT))
+	else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
 		addr = __alloc_from_contiguous(dev, size, prot, &page,
 					       caller, want_vaddr);
 	else if (is_coherent)
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
-	else if (!(gfp & __GFP_WAIT))
+	else if (!gfpflags_allow_blocking(gfp))
 		addr = __alloc_from_pool(size, &page);
 	else
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
@@ -1363,7 +1363,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
 	*handle = DMA_ERROR_CODE;
 	size = PAGE_ALIGN(size);
 
-	if (!(gfp & __GFP_WAIT))
+	if (!gfpflags_allow_blocking(gfp))
 		return __iommu_alloc_atomic(dev, size, handle);
 
 	/*
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 7c34f7126b04..c5f9a9e3d1f3 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -25,7 +25,7 @@
 unsigned long xen_get_swiotlb_free_pages(unsigned int order)
 {
 	struct memblock_region *reg;
-	gfp_t flags = __GFP_NOWARN;
+	gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
 
 	for_each_memblock(memory, reg) {
 		if (reg->base < (phys_addr_t)0xffffffff) {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6320361d8d4c..bb4bf6a06ad6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
 		flags |= GFP_DMA;
-	if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
+	if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
 		struct page *page;
 		void *addr;
 
@@ -148,7 +148,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 
 	size = PAGE_ALIGN(size);
 
-	if (!coherent && !(flags & __GFP_WAIT)) {
+	if (!coherent && !gfpflags_allow_blocking(flags)) {
 		struct page *page = NULL;
 		void *addr = __alloc_from_pool(size, &page, flags);
 
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cd99433b8ba1..6ba014c61d62 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -90,7 +90,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 again:
 	page = NULL;
 	/* CMA can be used only in the context which permits sleeping */
-	if (flag & __GFP_WAIT) {
+	if (gfpflags_allow_blocking(flag)) {
 		page = dma_alloc_from_contiguous(dev, count, get_order(size));
 		if (page && page_to_phys(page) + size > dma_mask) {
 			dma_release_from_contiguous(dev, page, count);
diff --git a/block/bio.c b/block/bio.c
index ad3f276d74bc..4f184d938942 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -211,7 +211,7 @@ fallback:
 		bvl = mempool_alloc(pool, gfp_mask);
 	} else {
 		struct biovec_slab *bvs = bvec_slabs + *idx;
-		gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+		gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
 
 		/*
 		 * Make this allocation restricted and don't dump info on
@@ -221,11 +221,11 @@ fallback:
 		__gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 
 		/*
-		 * Try a slab allocation. If this fails and __GFP_WAIT
+		 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
 		 * is set, retry with the 1-entry mempool
 		 */
 		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
-		if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
+		if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
 			*idx = BIOVEC_MAX_IDX;
 			goto fallback;
 		}
@@ -395,12 +395,12 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
  *   If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
  *   backed by the @bs's mempool.
  *
- *   When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
- *   able to allocate a bio. This is due to the mempool guarantees. To make this
- *   work, callers must never allocate more than 1 bio at a time from this pool.
- *   Callers that need to allocate more than 1 bio must always submit the
- *   previously allocated bio for IO before attempting to allocate a new one.
- *   Failure to do so can cause deadlocks under memory pressure.
+ *   When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
+ *   always be able to allocate a bio. This is due to the mempool guarantees.
+ *   To make this work, callers must never allocate more than 1 bio at a time
+ *   from this pool. Callers that need to allocate more than 1 bio must always
+ *   submit the previously allocated bio for IO before attempting to allocate
+ *   a new one. Failure to do so can cause deadlocks under memory pressure.
  *
  *   Note that when running under generic_make_request() (i.e. any block
  *   driver), bios are not submitted until after you return - see the code in
@@ -459,13 +459,13 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		 * We solve this, and guarantee forward progress, with a rescuer
 		 * workqueue per bio_set. If we go to allocate and there are
 		 * bios on current->bio_list, we first try the allocation
-		 * without __GFP_WAIT; if that fails, we punt those bios we
-		 * would be blocking to the rescuer workqueue before we retry
-		 * with the original gfp_flags.
+		 * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
+		 * bios we would be blocking to the rescuer workqueue before
+		 * we retry with the original gfp_flags.
 		 */
 
 		if (current->bio_list && !bio_list_empty(current->bio_list))
-			gfp_mask &= ~__GFP_WAIT;
+			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
 		if (!p && gfp_mask != saved_gfp) {
diff --git a/block/blk-core.c b/block/blk-core.c
index 89eec7965870..9e32f0868e36 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1206,8 +1206,8 @@ rq_starved:
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
- * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
- * function keeps retrying under memory pressure and fails iff @q is dead.
+ * Get a free request from @q.  If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
+ * this function keeps retrying under memory pressure and fails iff @q is dead.
  *
  * Must be called with @q->queue_lock held and,
  * Returns ERR_PTR on failure, with @q->queue_lock held.
@@ -1227,7 +1227,7 @@ retry:
 	if (!IS_ERR(rq))
 		return rq;
 
-	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
+	if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return rq;
 	}
@@ -1305,11 +1305,11 @@ EXPORT_SYMBOL(blk_get_request);
  * BUG.
  *
  * WARNING: When allocating/cloning a bio-chain, careful consideration should be
- * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
- * anything but the first bio in the chain. Otherwise you risk waiting for IO
- * completion of a bio that hasn't been submitted yet, thus resulting in a
- * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
- * of bio_alloc(), as that avoids the mempool deadlock.
+ * given to how you allocate bios. In particular, you cannot use
+ * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
+ * you risk waiting for IO completion of a bio that hasn't been submitted yet,
+ * thus resulting in a deadlock. Alternatively bios should be allocated using
+ * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
  * If possible a big IO should be split into smaller parts when allocation
  * fails. Partial allocation should not be an error, or you risk a live-lock.
  */
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 1a27f45ec776..381cb50a673c 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,7 @@ struct io_context *get_task_io_context(struct task_struct *task,
 {
 	struct io_context *ioc;
 
-	might_sleep_if(gfp_flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_flags));
 
 	do {
 		task_lock(task);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 60ac684c8b8c..a07ca3488d96 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
 	if (tag != -1)
 		return tag;
 
-	if (!(data->gfp & __GFP_WAIT))
+	if (!gfpflags_allow_blocking(data->gfp))
 		return -1;
 
 	bs = bt_wait_ptr(bt, hctx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c27b3eaef64..68c0a3416b34 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -244,11 +244,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
 
 	ctx = blk_mq_get_ctx(q);
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
-	blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_WAIT,
+	blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
 			reserved, ctx, hctx);
 
 	rq = __blk_mq_alloc_request(&alloc_data, rw);
-	if (!rq && (gfp & __GFP_WAIT)) {
+	if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
 		__blk_mq_run_hw_queue(hctx);
 		blk_mq_put_ctx(ctx);
 
@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
 		blk_mq_set_alloc_data(&alloc_data, q,
-				__GFP_WAIT|GFP_ATOMIC, false, ctx, hctx);
+				__GFP_WAIT|__GFP_HIGH, false, ctx, hctx);
 		rq = __blk_mq_alloc_request(&alloc_data, rw);
 		ctx = alloc_data.ctx;
 		hctx = alloc_data.hctx;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c097909c589c..b4b5680ac6ad 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -357,7 +357,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 	}
 
 	if (has_payload && data_size) {
-		page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
+		page = drbd_alloc_pages(peer_device, nr_pages,
+					gfpflags_allow_blocking(gfp_mask));
 		if (!page)
 			goto fail;
 	}
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index e22942596207..1b709a4e3b5e 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -271,7 +271,7 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
 			goto err_out;
 
 		tmp->bi_bdev = NULL;
-		gfpmask &= ~__GFP_WAIT;
+		gfpmask &= ~__GFP_DIRECT_RECLAIM;
 		tmp->bi_next = NULL;
 
 		if (!new_chain)
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 30f522848c73..d7373ca69c99 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -124,7 +124,8 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
 	if (group)
 		return netlink_broadcast(dev->nls, skb, portid, group,
 					 gfp_mask);
-	return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT));
+	return netlink_unicast(dev->nls, skb, portid,
+			!gfpflags_allow_blocking(gfp_mask));
 }
 EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
 
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 2a3973a7c441..36a7c2d89a01 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -486,7 +486,7 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
 static int add_client_resource(struct client *client,
 			       struct client_resource *resource, gfp_t gfp_mask)
 {
-	bool preload = !!(gfp_mask & __GFP_WAIT);
+	bool preload = gfpflags_allow_blocking(gfp_mask);
 	unsigned long flags;
 	int ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d631a946481..d58cb9e034fe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2215,7 +2215,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 */
 	mapping = file_inode(obj->base.filp)->i_mapping;
 	gfp = mapping_gfp_mask(mapping);
-	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
+	gfp |= __GFP_NORETRY | __GFP_NOWARN;
 	gfp &= ~(__GFP_IO | __GFP_WAIT);
 	sg = st->sgl;
 	st->nents = 0;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c014b33d8e0..59ab264c99c4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1083,7 +1083,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 
 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 {
-	bool preload = !!(gfp_mask & __GFP_WAIT);
+	bool preload = gfpflags_allow_blocking(gfp_mask);
 	unsigned long flags;
 	int ret, id;
 
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0d533bba4ad1..8b2be1e7714f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2668,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 
 	page = alloc_pages(flag | __GFP_NOWARN,  get_order(size));
 	if (!page) {
-		if (!(flag & __GFP_WAIT))
+		if (!gfpflags_allow_blocking(flag))
 			return NULL;
 
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7cf80c1a8a16..f1042daef9ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3647,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 			flags |= GFP_DMA32;
 	}
 
-	if (flags & __GFP_WAIT) {
+	if (gfpflags_allow_blocking(flags)) {
 		unsigned int count = size >> PAGE_SHIFT;
 
 		page = dma_alloc_from_contiguous(dev, count, order);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3729b394432c..917d47e290ae 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -994,7 +994,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
 	struct bio_vec *bvec;
 
 retry:
-	if (unlikely(gfp_mask & __GFP_WAIT))
+	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
 		mutex_lock(&cc->bio_alloc_lock);
 
 	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
@@ -1010,7 +1010,7 @@ retry:
 		if (!page) {
 			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
-			gfp_mask |= __GFP_WAIT;
+			gfp_mask |= __GFP_DIRECT_RECLAIM;
 			goto retry;
 		}
 
@@ -1027,7 +1027,7 @@ retry:
 	}
 
 return_clone:
-	if (unlikely(gfp_mask & __GFP_WAIT))
+	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
 		mutex_unlock(&cc->bio_alloc_lock);
 
 	return clone;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3a7cade5e27d..1452ed9aacb4 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -244,7 +244,7 @@ static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
 	*pages = NULL;
 
 	do {
-		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY);
+		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
 		if (unlikely(!pl)) {
 			/* Use reserved pages */
 			pl = kc->pages;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
index 1bd2fd47421f..4432fd69b7cb 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
@@ -1297,7 +1297,7 @@ static struct solo_enc_dev *solo_enc_alloc(struct solo_dev *solo_dev,
 	solo_enc->vidq.ops = &solo_enc_video_qops;
 	solo_enc->vidq.mem_ops = &vb2_dma_sg_memops;
 	solo_enc->vidq.drv_priv = solo_enc;
-	solo_enc->vidq.gfp_flags = __GFP_DMA32;
+	solo_enc->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
 	solo_enc->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	solo_enc->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
 	solo_enc->vidq.lock = &solo_enc->lock;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2.c b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
index 26df903585d7..f7ce493b1fee 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
@@ -678,7 +678,7 @@ int solo_v4l2_init(struct solo_dev *solo_dev, unsigned nr)
 	solo_dev->vidq.mem_ops = &vb2_dma_contig_memops;
 	solo_dev->vidq.drv_priv = solo_dev;
 	solo_dev->vidq.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	solo_dev->vidq.gfp_flags = __GFP_DMA32;
+	solo_dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
 	solo_dev->vidq.buf_struct_size = sizeof(struct solo_vb2_buf);
 	solo_dev->vidq.lock = &solo_dev->lock;
 	ret = vb2_queue_init(&solo_dev->vidq);
diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
index 4c3293dcddbc..46642ef9151b 100644
--- a/drivers/media/pci/tw68/tw68-video.c
+++ b/drivers/media/pci/tw68/tw68-video.c
@@ -979,7 +979,7 @@ int tw68_video_init2(struct tw68_dev *dev, int video_nr)
 	dev->vidq.ops = &tw68_video_qops;
 	dev->vidq.mem_ops = &vb2_dma_sg_memops;
 	dev->vidq.drv_priv = dev;
-	dev->vidq.gfp_flags = __GFP_DMA32;
+	dev->vidq.gfp_flags = __GFP_DMA32 | __GFP_KSWAPD_RECLAIM;
 	dev->vidq.buf_struct_size = sizeof(struct tw68_buf);
 	dev->vidq.lock = &dev->lock;
 	dev->vidq.min_buffers_needed = 2;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 8bbbb751bf45..2dfb291a47c6 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1188,8 +1188,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
  */
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
 {
-	gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
-		       __GFP_NORETRY | __GFP_NO_KSWAPD;
+	gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY;
 	size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
 	void *kbuf;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 44173be5cbf0..f8d7a2f06950 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -691,7 +691,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
 {
 	if (fp->rx_frag_size) {
 		/* GFP_KERNEL allocations are used only during initialization */
-		if (unlikely(gfp_mask & __GFP_WAIT))
+		if (unlikely(gfpflags_allow_blocking(gfp_mask)))
 			return (void *)__get_free_page(gfp_mask);
 
 		return netdev_alloc_frag(fp->rx_frag_size);
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index ada724aab3d5..d4c3e5512dd5 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -27,7 +27,7 @@
 #include "ion_priv.h"
 
 static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
-				     __GFP_NORETRY) & ~__GFP_WAIT;
+				     __GFP_NORETRY) & ~__GFP_DIRECT_RECLAIM;
 static gfp_t low_order_gfp_flags  = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN);
 static const unsigned int orders[] = {8, 4, 0};
 static const int num_orders = ARRAY_SIZE(orders);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 6af733de69ca..f0b0423a716b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -95,7 +95,7 @@ do {								    \
 do {									    \
 	LASSERT(!in_interrupt() ||					    \
 		((size) <= LIBCFS_VMALLOC_SIZE &&			    \
-		 ((mask) & __GFP_WAIT) == 0));				    \
+		 !gfpflags_allow_blocking(mask)));			    \
 } while (0)
 
 #define LIBCFS_ALLOC_POST(ptr, size)					    \
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 0a94895a358d..692ccc69345e 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2244,7 +2244,7 @@ static int u132_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
 {
 	struct u132 *u132 = hcd_to_u132(hcd);
 	if (irqs_disabled()) {
-		if (__GFP_WAIT & mem_flags) {
+		if (gfpflags_allow_blocking(mem_flags)) {
 			printk(KERN_ERR "invalid context for function that might sleep\n");
 			return -EINVAL;
 		}
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 6b70d7f62b2f..1c1e95a0b8fa 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -99,7 +99,7 @@ static int vmlfb_alloc_vram_area(struct vram_area *va, unsigned max_order,
 		 * below the first 16MB.
 		 */
 
-		flags = __GFP_DMA | __GFP_HIGH;
+		flags = __GFP_DMA | __GFP_HIGH | __GFP_KSWAPD_RECLAIM;
 		va->logical =
 			 __get_free_pages(flags, --max_order);
 	} while (va->logical == 0 && max_order > min_order);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1e60d00d4ea7..c339d561e596 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2572,7 +2572,7 @@ int open_ctree(struct super_block *sb,
 	fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 	fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
 	/* readahead state */
-	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
+	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 	spin_lock_init(&fs_info->reada_lock);
 
 	fs_info->thread_pool_size = min_t(unsigned long,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3915c9473e94..032abfbebe76 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -594,7 +594,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
 		clear = 1;
 again:
-	if (!prealloc && (mask & __GFP_WAIT)) {
+	if (!prealloc && gfpflags_allow_blocking(mask)) {
 		/*
 		 * Don't care for allocation failure here because we might end
 		 * up not needing the pre-allocated extent state at all, which
@@ -718,7 +718,7 @@ search_again:
 	if (start > end)
 		goto out;
 	spin_unlock(&tree->lock);
-	if (mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(mask))
 		cond_resched();
 	goto again;
 }
@@ -850,7 +850,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 
 	bits |= EXTENT_FIRST_DELALLOC;
 again:
-	if (!prealloc && (mask & __GFP_WAIT)) {
+	if (!prealloc && gfpflags_allow_blocking(mask)) {
 		prealloc = alloc_extent_state(mask);
 		BUG_ON(!prealloc);
 	}
@@ -1028,7 +1028,7 @@ search_again:
 	if (start > end)
 		goto out;
 	spin_unlock(&tree->lock);
-	if (mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(mask))
 		cond_resched();
 	goto again;
 }
@@ -1076,7 +1076,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	btrfs_debug_check_extent_io_range(tree, start, end);
 
 again:
-	if (!prealloc && (mask & __GFP_WAIT)) {
+	if (!prealloc && gfpflags_allow_blocking(mask)) {
 		/*
 		 * Best effort, don't worry if extent state allocation fails
 		 * here for the first iteration. We might have a cached state
@@ -1253,7 +1253,7 @@ search_again:
 	if (start > end)
 		goto out;
 	spin_unlock(&tree->lock);
-	if (mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(mask))
 		cond_resched();
 	first_iteration = false;
 	goto again;
@@ -4319,7 +4319,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_CACHE_SIZE - 1;
 
-	if ((mask & __GFP_WAIT) &&
+	if (gfpflags_allow_blocking(mask) &&
 	    page->mapping->host->i_size > 16 * 1024 * 1024) {
 		u64 len;
 		while (start <= end) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6fc735869c18..e023919b4470 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -156,8 +156,8 @@ static struct btrfs_device *__alloc_device(void)
 	spin_lock_init(&dev->reada_lock);
 	atomic_set(&dev->reada_in_flight, 0);
 	atomic_set(&dev->dev_stats_ccnt, 0);
-	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
 	return dev;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a63c7b0a10cf..49f6c78ee3af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1058,7 +1058,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 		return 0;
 	if (journal)
 		return jbd2_journal_try_to_free_buffers(journal, page,
-							wait & ~__GFP_WAIT);
+						wait & ~__GFP_DIRECT_RECLAIM);
 	return try_to_free_buffers(page);
 }
 
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d403c69bee08..4304072161aa 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -111,7 +111,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
 
 	/* radix tree insertion won't use the preallocation pool unless it's
 	 * told it may not wait */
-	INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT);
+	INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
 	switch (cookie->def->type) {
 	case FSCACHE_COOKIE_TYPE_INDEX:
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 483bbc613bf0..79483b3d8c6f 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -58,7 +58,7 @@ bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page)
 
 /*
  * decide whether a page can be released, possibly by cancelling a store to it
- * - we're allowed to sleep if __GFP_WAIT is flagged
+ * - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged
  */
 bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
 				  struct page *page,
@@ -122,7 +122,7 @@ page_busy:
 	 * allocator as the work threads writing to the cache may all end up
 	 * sleeping on memory allocation, so we may need to impose a timeout
 	 * too. */
-	if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) {
+	if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) {
 		fscache_stat(&fscache_n_store_vmscan_busy);
 		return false;
 	}
@@ -132,7 +132,7 @@ page_busy:
 		_debug("fscache writeout timeout page: %p{%lx}",
 			page, page->index);
 
-	gfp &= ~__GFP_WAIT;
+	gfp &= ~__GFP_DIRECT_RECLAIM;
 	goto try_again;
 }
 EXPORT_SYMBOL(__fscache_maybe_release_page);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6b8338ec2464..89463eee6791 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1937,8 +1937,8 @@ out:
  * @journal: journal for operation
  * @page: to try and free
  * @gfp_mask: we use the mask to detect how hard should we try to release
- * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
- * release the buffers.
+ * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
+ * code to release the buffers.
  *
  *
  * For all the buffers on this page,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 37f639d50af5..93e236429c5d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -473,8 +473,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
 	/* Always try to initiate a 'commit' if relevant, but only
-	 * wait for it if __GFP_WAIT is set.  Even then, only wait 1
-	 * second and only if the 'bdi' is not congested.
+	 * wait for it if the caller allows blocking.  Even then,
+	 * only wait 1 second and only if the 'bdi' is not congested.
 	 * Waiting indefinitely can cause deadlocks when the NFS
 	 * server is on this machine, when a new TCP connection is
 	 * needed and in other rare cases.  There is no particular
@@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 	if (mapping) {
 		struct nfs_server *nfss = NFS_SERVER(mapping->host);
 		nfs_commit_inode(mapping->host, 0);
-		if ((gfp & __GFP_WAIT) &&
+		if (gfpflags_allow_blocking(gfp) &&
 		    !bdi_write_congested(&nfss->backing_dev_info)) {
 			wait_on_page_bit_killable_timeout(page, PG_private,
 							  HZ);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index eac9549efd52..587174fd4f2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -525,7 +525,7 @@ xfs_qm_shrink_scan(
 	unsigned long		freed;
 	int			error;
 
-	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
+	if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
 		return 0;
 
 	INIT_LIST_HEAD(&isol.buffers);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 440fca3e7e5d..b56e811b6f7c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -29,12 +29,13 @@ struct vm_area_struct;
 #define ___GFP_NOMEMALLOC	0x10000u
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
-#define ___GFP_WAIT		0x80000u
+#define ___GFP_ATOMIC		0x80000u
 #define ___GFP_NOACCOUNT	0x100000u
 #define ___GFP_NOTRACK		0x200000u
-#define ___GFP_NO_KSWAPD	0x400000u
+#define ___GFP_DIRECT_RECLAIM	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
 #define ___GFP_WRITE		0x1000000u
+#define ___GFP_KSWAPD_RECLAIM	0x2000000u
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
@@ -71,7 +72,7 @@ struct vm_area_struct;
  * __GFP_MOVABLE: Flag that this page will be movable by the page migration
  * mechanism or reclaimed
  */
-#define __GFP_WAIT	((__force gfp_t)___GFP_WAIT)	/* Can wait and reschedule? */
+#define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)  /* Caller cannot wait or reschedule */
 #define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)	/* Should access emergency pools? */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)	/* Can start physical IO? */
 #define __GFP_FS	((__force gfp_t)___GFP_FS)	/* Can call down to low-level FS? */
@@ -94,23 +95,37 @@ struct vm_area_struct;
 #define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
-#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
+/*
+ * A caller that is willing to wait may enter direct reclaim and will
+ * wake kswapd to reclaim pages in the background until the high
+ * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to
+ * avoid unnecessary delays when a fallback option is available but
+ * still allow kswapd to reclaim in the background. The kswapd flag
+ * can be cleared when the reclaiming of pages would cause unnecessary
+ * disruption.
+ */
+#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
+#define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
  * allocations that simply cannot be supported (e.g. page tables).
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 26	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
-/* This equals 0, but use constants in case they ever change */
-#define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
-/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
-#define GFP_ATOMIC	(__GFP_HIGH)
+/*
+ * GFP_ATOMIC callers can not sleep, need the allocation to succeed.
+ * A lower watermark is applied to allow access to "atomic reserves"
+ */
+#define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
@@ -119,10 +134,10 @@ struct vm_area_struct;
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
-#define GFP_IOFS	(__GFP_IO | __GFP_FS)
-#define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
-			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
-			 __GFP_NO_KSWAPD)
+#define GFP_IOFS	(__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM)
+#define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
+			 ~__GFP_KSWAPD_RECLAIM)
 
 /* This mask makes up all the page movable related flags */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -164,6 +179,11 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
 }
 
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+	return gfp_flags & __GFP_DIRECT_RECLAIM;
+}
+
 #ifdef CONFIG_HIGHMEM
 #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
 #else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 24f4dfd94c51..4355129fff91 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb)
 
 static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
 {
-	might_sleep_if(pri & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(pri));
 
 	if (skb_cloned(skb))
 		return pskb_expand_head(skb, 0, 0, pri);
@@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb)
  */
 static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 {
-	might_sleep_if(pri & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(pri));
 	if (skb_shared(skb)) {
 		struct sk_buff *nskb = skb_clone(skb, pri);
 
@@ -1344,7 +1344,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
 					  gfp_t pri)
 {
-	might_sleep_if(pri & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(pri));
 	if (skb_cloned(skb)) {
 		struct sk_buff *nskb = skb_copy(skb, pri);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index f570e75e3da9..bbf7c2cf15b4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2041,7 +2041,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
  */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-	if (sk->sk_allocation & __GFP_WAIT)
+	if (gfpflags_allow_blocking(sk->sk_allocation))
 		return &current->task_frag;
 
 	return &sk->sk_frag;
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..dde6bf092c8a 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -20,7 +20,7 @@
 	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
 	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
 	{(unsigned long)__GFP_HIGH,		"GFP_HIGH"},		\
-	{(unsigned long)__GFP_WAIT,		"GFP_WAIT"},		\
+	{(unsigned long)__GFP_ATOMIC,		"GFP_ATOMIC"},		\
 	{(unsigned long)__GFP_IO,		"GFP_IO"},		\
 	{(unsigned long)__GFP_COLD,		"GFP_COLD"},		\
 	{(unsigned long)__GFP_NOWARN,		"GFP_NOWARN"},		\
@@ -36,7 +36,8 @@
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
-	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
+	{(unsigned long)__GFP_DIRECT_RECLAIM,	"GFP_DIRECT_RECLAIM"},	\
+	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"GFP_KSWAPD_RECLAIM"},	\
 	{(unsigned long)__GFP_OTHER_NODE,	"GFP_OTHER_NODE"}	\
 	) : "GFP_NOWAIT"
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 8a056a32ded7..5ffcbd354a52 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1371,16 +1371,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 	if (unlikely(audit_filter_type(type)))
 		return NULL;
 
-	if (gfp_mask & __GFP_WAIT) {
+	if (gfp_mask & __GFP_DIRECT_RECLAIM) {
 		if (audit_pid && audit_pid == current->pid)
-			gfp_mask &= ~__GFP_WAIT;
+			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 		else
 			reserve = 0;
 	}
 
 	while (audit_backlog_limit
 	       && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
-		if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) {
+		if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
 			long sleep_time;
 
 			sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b9d0cce3f9ce..f1603c153890 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -299,7 +299,7 @@ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
 
 	idr_preload(gfp_mask);
 	spin_lock_bh(&cgroup_idr_lock);
-	ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_WAIT);
+	ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
 	spin_unlock_bh(&cgroup_idr_lock);
 	idr_preload_end();
 	return ret;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4e49cc4c9952..deae3907ac1e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2738,7 +2738,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
 		return;
 
 	/* no reclaim without waiting on it */
-	if (!(gfp_mask & __GFP_WAIT))
+	if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return;
 
 	/* this guy won't enter reclaim */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5235dd4e1e2f..3a970604308f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1779,7 +1779,7 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 	while (to_alloc-- > 0) {
 		struct page *page;
 
-		page = alloc_image_page(__GFP_HIGHMEM);
+		page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
 		memory_bm_set_bit(bm, page_to_pfn(page));
 	}
 	return nr_highmem;
diff --git a/kernel/smp.c b/kernel/smp.c
index 07854477c164..d903c02223af 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,7 +669,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 	cpumask_var_t cpus;
 	int cpu, ret;
 
-	might_sleep_if(gfp_flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_flags));
 
 	if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
 		preempt_disable();
diff --git a/lib/idr.c b/lib/idr.c
index 5335c43adf46..6098336df267 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -399,7 +399,7 @@ void idr_preload(gfp_t gfp_mask)
 	 * allocation guarantee.  Disallow usage from those contexts.
 	 */
 	WARN_ON_ONCE(in_interrupt());
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
 
 	preempt_disable();
 
@@ -453,7 +453,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
 	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
 	int id;
 
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
 
 	/* sanity checks */
 	if (WARN_ON_ONCE(start < 0))
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f9ebe1c82060..fcf5d98574ce 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -188,7 +188,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 	 * preloading in the interrupt anyway as all the allocations have to
 	 * be atomic. So just do normal allocation when in interrupt.
 	 */
-	if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) {
+	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
 		struct radix_tree_preload *rtp;
 
 		/*
@@ -249,7 +249,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  * with preemption not disabled.
  *
  * To make use of this facility, the radix tree must be initialised without
- * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  */
 static int __radix_tree_preload(gfp_t gfp_mask)
 {
@@ -286,12 +286,12 @@ out:
  * with preemption not disabled.
  *
  * To make use of this facility, the radix tree must be initialised without
- * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  */
 int radix_tree_preload(gfp_t gfp_mask)
 {
 	/* Warn on non-sensical use... */
-	WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT));
+	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
 	return __radix_tree_preload(gfp_mask);
 }
 EXPORT_SYMBOL(radix_tree_preload);
@@ -303,7 +303,7 @@ EXPORT_SYMBOL(radix_tree_preload);
  */
 int radix_tree_maybe_preload(gfp_t gfp_mask)
 {
-	if (gfp_mask & __GFP_WAIT)
+	if (gfpflags_allow_blocking(gfp_mask))
 		return __radix_tree_preload(gfp_mask);
 	/* Preloading doesn't help anything with this gfp mask, skip it */
 	preempt_disable();
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 619984fc07ec..8ed2ffd963c5 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -637,7 +637,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 {
 	struct bdi_writeback *wb;
 
-	might_sleep_if(gfp & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
 	if (!memcg_css->parent)
 		return &bdi->wb;
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 312a716fa14c..57312b5d6e12 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -326,7 +326,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 	size_t offset;
 	void *retval;
 
-	might_sleep_if(mem_flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(mem_flags));
 
 	spin_lock_irqsave(&pool->lock, flags);
 	list_for_each_entry(page, &pool->page_list, page_list) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc502e590366..05374f09339c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2046,7 +2046,7 @@ retry:
 	if (unlikely(task_in_memcg_oom(current)))
 		goto nomem;
 
-	if (!(gfp_mask & __GFP_WAIT))
+	if (!gfpflags_allow_blocking(gfp_mask))
 		goto nomem;
 
 	mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
@@ -4364,8 +4364,8 @@ static int mem_cgroup_do_precharge(unsigned long count)
 {
 	int ret;
 
-	/* Try a single bulk charge without reclaim first */
-	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+	/* Try a single bulk charge without reclaim first, kswapd may wake */
+	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
 	if (!ret) {
 		mc.precharge += count;
 		return ret;
diff --git a/mm/mempool.c b/mm/mempool.c
index 4c533bc51d73..004d42b1dfaf 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -320,13 +320,13 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 	gfp_t gfp_temp;
 
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
 
 	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
 
-	gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
+	gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
 
 repeat_alloc:
 
@@ -349,7 +349,7 @@ repeat_alloc:
 	}
 
 	/*
-	 * We use gfp mask w/o __GFP_WAIT or IO for the first round.  If
+	 * We use gfp mask w/o direct reclaim or IO for the first round.  If
 	 * alloc failed with that and @pool was empty, retry immediately.
 	 */
 	if (gfp_temp != gfp_mask) {
@@ -358,8 +358,8 @@ repeat_alloc:
 		goto repeat_alloc;
 	}
 
-	/* We must not sleep if !__GFP_WAIT */
-	if (!(gfp_mask & __GFP_WAIT)) {
+	/* We must not sleep if !__GFP_DIRECT_RECLAIM */
+	if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
 		spin_unlock_irqrestore(&pool->lock, flags);
 		return NULL;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 2834faba719a..e60379eb23f8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1578,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 					 (GFP_HIGHUSER_MOVABLE |
 					  __GFP_THISNODE | __GFP_NOMEMALLOC |
 					  __GFP_NORETRY | __GFP_NOWARN) &
-					 ~GFP_IOFS, 0);
+					 ~(__GFP_IO | __GFP_FS), 0);
 
 	return newpage;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 67390988881a..70461f3e3378 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -169,12 +169,12 @@ void pm_restrict_gfp_mask(void)
 	WARN_ON(!mutex_is_locked(&pm_mutex));
 	WARN_ON(saved_gfp_mask);
 	saved_gfp_mask = gfp_allowed_mask;
-	gfp_allowed_mask &= ~GFP_IOFS;
+	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
 }
 
 bool pm_suspended_storage(void)
 {
-	if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+	if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
 		return false;
 	return true;
 }
@@ -2183,7 +2183,7 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 		return false;
 	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
 		return false;
-	if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT))
+	if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM))
 		return false;
 
 	return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2685,7 +2685,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 		if (test_thread_flag(TIF_MEMDIE) ||
 		    (current->flags & (PF_MEMALLOC | PF_EXITING)))
 			filter &= ~SHOW_MEM_FILTER_NODES;
-	if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
+	if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
 		filter &= ~SHOW_MEM_FILTER_NODES;
 
 	if (fmt) {
@@ -2945,7 +2945,6 @@ static inline int
 gfp_to_alloc_flags(gfp_t gfp_mask)
 {
 	int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
-	const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
 
 	/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
 	BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2954,11 +2953,11 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 	 * The caller may dip into page reserves a bit more if the caller
 	 * cannot run direct reclaim, or if the caller has realtime scheduling
 	 * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
-	 * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
+	 * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
 	 */
 	alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
 
-	if (atomic) {
+	if (gfp_mask & __GFP_ATOMIC) {
 		/*
 		 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
 		 * if it can't schedule.
@@ -2995,11 +2994,16 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
 	return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
 }
 
+static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
+{
+	return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
+}
+
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 						struct alloc_context *ac)
 {
-	const gfp_t wait = gfp_mask & __GFP_WAIT;
+	bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
 	struct page *page = NULL;
 	int alloc_flags;
 	unsigned long pages_reclaimed = 0;
@@ -3019,16 +3023,24 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		return NULL;
 	}
 
+	/*
+	 * We also sanity check to catch abuse of atomic reserves being used by
+	 * callers that are not in atomic context.
+	 */
+	if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
+				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
+		gfp_mask &= ~__GFP_ATOMIC;
+
 	/*
 	 * If this allocation cannot block and it is for a specific node, then
 	 * fail early.  There's no need to wakeup kswapd or retry for a
 	 * speculative node-specific allocation.
 	 */
-	if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
+	if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
 		goto nopage;
 
 retry:
-	if (!(gfp_mask & __GFP_NO_KSWAPD))
+	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
 
 	/*
@@ -3071,8 +3083,8 @@ retry:
 		}
 	}
 
-	/* Atomic allocations - we can't balance anything */
-	if (!wait) {
+	/* Caller is not willing to reclaim, we can't balance anything */
+	if (!can_direct_reclaim) {
 		/*
 		 * All existing users of the deprecated __GFP_NOFAIL are
 		 * blockable, so warn of any new users that actually allow this
@@ -3102,7 +3114,7 @@ retry:
 		goto got_pg;
 
 	/* Checks for THP-specific high-order allocations */
-	if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+	if (is_thp_gfp_mask(gfp_mask)) {
 		/*
 		 * If compaction is deferred for high-order allocations, it is
 		 * because sync compaction recently failed. If this is the case
@@ -3137,8 +3149,7 @@ retry:
 	 * fault, so use asynchronous memory compaction for THP unless it is
 	 * khugepaged trying to collapse.
 	 */
-	if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
-						(current->flags & PF_KTHREAD))
+	if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD))
 		migration_mode = MIGRATE_SYNC_LIGHT;
 
 	/* Try direct reclaim and then allocating */
@@ -3209,7 +3220,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 
 	lockdep_trace_alloc(gfp_mask);
 
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
 
 	if (should_fail_alloc_page(gfp_mask, order))
 		return NULL;
diff --git a/mm/slab.c b/mm/slab.c
index 272e809404d5..a9ef77d19a9a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1031,12 +1031,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 }
 
 /*
- * Construct gfp mask to allocate from a specific node but do not invoke reclaim
- * or warn about failures.
+ * Construct gfp mask to allocate from a specific node but do not direct reclaim
+ * or warn about failures. kswapd may still wake to reclaim in the background.
  */
 static inline gfp_t gfp_exact_node(gfp_t flags)
 {
-	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT;
+	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM;
 }
 #endif
 
@@ -2633,7 +2633,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
 	offset *= cachep->colour_off;
 
-	if (local_flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(local_flags))
 		local_irq_enable();
 
 	/*
@@ -2663,7 +2663,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
 	cache_init_objs(cachep, page);
 
-	if (local_flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(local_flags))
 		local_irq_disable();
 	check_irq_off();
 	spin_lock(&n->list_lock);
@@ -2677,7 +2677,7 @@ static int cache_grow(struct kmem_cache *cachep,
 opps1:
 	kmem_freepages(cachep, page);
 failed:
-	if (local_flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(local_flags))
 		local_irq_disable();
 	return 0;
 }
@@ -2869,7 +2869,7 @@ force_grow:
 static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
 						gfp_t flags)
 {
-	might_sleep_if(flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(flags));
 #if DEBUG
 	kmem_flagcheck(cachep, flags);
 #endif
@@ -3057,11 +3057,11 @@ retry:
 		 */
 		struct page *page;
 
-		if (local_flags & __GFP_WAIT)
+		if (gfpflags_allow_blocking(local_flags))
 			local_irq_enable();
 		kmem_flagcheck(cache, flags);
 		page = kmem_getpages(cache, local_flags, numa_mem_id());
-		if (local_flags & __GFP_WAIT)
+		if (gfpflags_allow_blocking(local_flags))
 			local_irq_disable();
 		if (page) {
 			/*
diff --git a/mm/slub.c b/mm/slub.c
index 75a5fa92ac2a..97695622a858 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1265,7 +1265,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 {
 	flags &= gfp_allowed_mask;
 	lockdep_trace_alloc(flags);
-	might_sleep_if(flags & __GFP_WAIT);
+	might_sleep_if(gfpflags_allow_blocking(flags));
 
 	if (should_failslab(s->object_size, flags, s->flags))
 		return NULL;
@@ -1353,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	flags &= gfp_allowed_mask;
 
-	if (flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(flags))
 		local_irq_enable();
 
 	flags |= s->allocflags;
@@ -1363,8 +1363,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	 * so we fall-back to the minimum order allocation.
 	 */
 	alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
-	if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min))
-		alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT;
+	if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
+		alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
 
 	page = alloc_slab_page(s, alloc_gfp, node, oo);
 	if (unlikely(!page)) {
@@ -1424,7 +1424,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	page->frozen = 1;
 
 out:
-	if (flags & __GFP_WAIT)
+	if (gfpflags_allow_blocking(flags))
 		local_irq_disable();
 	if (!page)
 		return NULL;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9db9ef5e8481..7ee94dc10000 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1617,7 +1617,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			goto fail;
 		}
 		area->pages[i] = page;
-		if (gfp_mask & __GFP_WAIT)
+		if (gfpflags_allow_blocking(gfp_mask))
 			cond_resched();
 	}
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0cd7eed4e38..2aec4241b42a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1476,7 +1476,7 @@ static int too_many_isolated(struct zone *zone, int file,
 	 * won't get blocked by normal direct-reclaimers, forming a circular
 	 * deadlock.
 	 */
-	if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS)
+	if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
 		inactive >>= 3;
 
 	return isolated > inactive;
@@ -3791,7 +3791,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	/*
 	 * Do not scan if the allocation should not be delayed.
 	 */
-	if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
+	if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
 		return ZONE_RECLAIM_NOSCAN;
 
 	/*
diff --git a/mm/zswap.c b/mm/zswap.c
index 4043df7c672f..e54166d3732e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -571,7 +571,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
 	struct zswap_pool *pool;
-	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
 
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 	if (!pool) {
@@ -1011,7 +1011,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	/* store */
 	len = dlen + sizeof(struct zswap_header);
 	ret = zpool_malloc(entry->pool->zpool, len,
-			   __GFP_NORETRY | __GFP_NOWARN, &handle);
+			   __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
+			   &handle);
 	if (ret == -ENOSPC) {
 		zswap_reject_compress_poor++;
 		goto put_dstmem;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..aa41e6dd6429 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
 	len += NET_SKB_PAD;
 
 	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-	    (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
 		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
 		if (!skb)
 			goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	len += NET_SKB_PAD + NET_IP_ALIGN;
 
 	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-	    (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
 		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
 		if (!skb)
 			goto skb_fail;
@@ -4452,7 +4452,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 		return NULL;
 
 	gfp_head = gfp_mask;
-	if (gfp_head & __GFP_WAIT)
+	if (gfp_head & __GFP_DIRECT_RECLAIM)
 		gfp_head |= __GFP_REPEAT;
 
 	*errcode = -ENOBUFS;
@@ -4467,7 +4467,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
 		while (order) {
 			if (npages >= 1 << order) {
-				page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
+				page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
 						   __GFP_COMP |
 						   __GFP_NOWARN |
 						   __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 7529eb9463be..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1944,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
 
 	pfrag->offset = 0;
 	if (SKB_FRAG_PAGE_ORDER) {
-		pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
-					  __GFP_NOWARN | __GFP_NORETRY,
+		/* Avoid direct reclaim but allow kswapd to wake */
+		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
+					  __GFP_COMP | __GFP_NOWARN |
+					  __GFP_NORETRY,
 					  SKB_FRAG_PAGE_ORDER);
 		if (likely(pfrag->page)) {
 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fafe33bdb619..59651af8cc27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 	consume_skb(info.skb2);
 
 	if (info.delivered) {
-		if (info.congested && (allocation & __GFP_WAIT))
+		if (info.congested && gfpflags_allow_blocking(allocation))
 			yield();
 		return 0;
 	}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 96744b75db93..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
 	gfp_t slab_mask = GFP_NOWAIT;
 	gfp_t page_mask = GFP_NOWAIT;
 
-	if (gfp & __GFP_WAIT) {
+	if (gfp & __GFP_DIRECT_RECLAIM) {
 		slab_mask = GFP_KERNEL;
 		page_mask = GFP_HIGHUSER;
 	}
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 	struct ib_recv_wr *failed_wr;
 	unsigned int posted = 0;
 	int ret = 0;
-	bool can_wait = !!(gfp & __GFP_WAIT);
+	bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
 	u32 pos;
 
 	/* the goal here is to just make sure that someone, somewhere
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 692b3e67fb54..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 		if (bundle->num_conns >= 20) {
 			_debug("too many conns");
 
-			if (!(gfp & __GFP_WAIT)) {
+			if (!gfpflags_allow_blocking(gfp)) {
 				_leave(" = -EAGAIN");
 				return -EAGAIN;
 			}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..559afd0ee7de 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1590,7 +1590,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
 /* Set an association id for a given association */
 int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 {
-	bool preload = !!(gfp & __GFP_WAIT);
+	bool preload = gfpflags_allow_blocking(gfp);
 	int ret;
 
 	/* If the id is already assigned, keep it. */
-- 
cgit v1.2.3


From 40113370836e8e79befa585277296ed42781ef31 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:25 -0800
Subject: mm: page_alloc: remove GFP_IOFS

GFP_IOFS was intended to be shorthand for clearing two flags, not a set of
allocation flags.  There is only one user of this flag combination now and
there appears to be no reason why Lustre had to be protected from reclaim
stalls.  As none of the sites appear to be atomic, this patch simply
deletes GFP_IOFS and converts Lustre to using GFP_KERNEL, GFP_NOFS or
GFP_NOIO as appropriate.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Oleg Drokin <oleg.drokin@intel.com>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/lustre/lnet/lnet/router.c           | 2 +-
 drivers/staging/lustre/lnet/selftest/conrpc.c       | 2 +-
 drivers/staging/lustre/lnet/selftest/rpc.c          | 2 +-
 drivers/staging/lustre/lustre/libcfs/module.c       | 2 +-
 drivers/staging/lustre/lustre/libcfs/tracefile.c    | 2 +-
 drivers/staging/lustre/lustre/llite/remote_perm.c   | 2 +-
 drivers/staging/lustre/lustre/mgc/mgc_request.c     | 8 ++++----
 drivers/staging/lustre/lustre/obdecho/echo_client.c | 2 +-
 drivers/staging/lustre/lustre/osc/osc_cache.c       | 2 +-
 include/linux/gfp.h                                 | 1 -
 10 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index fe49f1b87652..4ea651c6db3a 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1245,7 +1245,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
 	for (i = 0; i < npages; i++) {
 		page = alloc_pages_node(
 				cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				__GFP_ZERO | GFP_IOFS, 0);
+				GFP_KERNEL | __GFP_ZERO, 0);
 		if (page == NULL) {
 			while (--i >= 0)
 				__free_page(rb->rb_kiov[i].kiov_page);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 0060ff64f88e..64a0335934f3 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -860,7 +860,7 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
 			bulk->bk_iovs[i].kiov_offset = 0;
 			bulk->bk_iovs[i].kiov_len    = len;
 			bulk->bk_iovs[i].kiov_page   =
-				alloc_page(GFP_IOFS);
+				alloc_page(GFP_KERNEL);
 
 			if (bulk->bk_iovs[i].kiov_page == NULL) {
 				lstcon_rpc_put(*crpc);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 162f9d330496..7005002c15da 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -146,7 +146,7 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
 		int nob;
 
 		pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
-				      GFP_IOFS, 0);
+				      GFP_KERNEL, 0);
 		if (pg == NULL) {
 			CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
 			srpc_free_bulk(bk);
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
index 50e8fd23fa17..07a68594c279 100644
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -319,7 +319,7 @@ static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *a
 	struct libcfs_ioctl_data *data;
 	int err = 0;
 
-	LIBCFS_ALLOC_GFP(buf, 1024, GFP_IOFS);
+	LIBCFS_ALLOC_GFP(buf, 1024, GFP_KERNEL);
 	if (buf == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.c b/drivers/staging/lustre/lustre/libcfs/tracefile.c
index 973c7c209dfc..f2d018d7823c 100644
--- a/drivers/staging/lustre/lustre/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lustre/libcfs/tracefile.c
@@ -810,7 +810,7 @@ int cfs_trace_allocate_string_buffer(char **str, int nob)
 	if (nob > 2 * PAGE_CACHE_SIZE)	    /* string must be "sensible" */
 		return -EINVAL;
 
-	*str = kmalloc(nob, GFP_IOFS | __GFP_ZERO);
+	*str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
 	if (*str == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
index c902133dfc97..fe4a72268e3a 100644
--- a/drivers/staging/lustre/lustre/llite/remote_perm.c
+++ b/drivers/staging/lustre/lustre/llite/remote_perm.c
@@ -82,7 +82,7 @@ static struct hlist_head *alloc_rmtperm_hash(void)
 	struct hlist_head *hash;
 	int i;
 
-	hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_IOFS | __GFP_ZERO);
+	hash = kmem_cache_alloc(ll_rmtperm_hash_cachep, GFP_NOFS | __GFP_ZERO);
 	if (!hash)
 		return NULL;
 
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index b81efcd997ae..5f53f3b7ceff 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -1112,7 +1112,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
 	LASSERT(cfg->cfg_instance != NULL);
 	LASSERT(cfg->cfg_sb == cfg->cfg_instance);
 
-	inst = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+	inst = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
 	if (!inst)
 		return -ENOMEM;
 
@@ -1308,14 +1308,14 @@ static int mgc_process_recover_log(struct obd_device *obd,
 	if (cfg->cfg_last_idx == 0) /* the first time */
 		nrpages = CONFIG_READ_NRPAGES_INIT;
 
-	pages = kcalloc(nrpages, sizeof(*pages), GFP_NOFS);
+	pages = kcalloc(nrpages, sizeof(*pages), GFP_KERNEL);
 	if (pages == NULL) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
 	for (i = 0; i < nrpages; i++) {
-		pages[i] = alloc_page(GFP_IOFS);
+		pages[i] = alloc_page(GFP_KERNEL);
 		if (pages[i] == NULL) {
 			rc = -ENOMEM;
 			goto out;
@@ -1466,7 +1466,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc,
 	if (cld->cld_cfg.cfg_sb)
 		lsi = s2lsi(cld->cld_cfg.cfg_sb);
 
-	env = kzalloc(sizeof(*env), GFP_NOFS);
+	env = kzalloc(sizeof(*env), GFP_KERNEL);
 	if (!env)
 		return -ENOMEM;
 
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index b6f000bb8c82..f61ef669644c 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -1562,7 +1562,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 		  (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
 		  (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
 
-	gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_IOFS : GFP_HIGHUSER;
+	gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
 
 	LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
 	LASSERT(lsm != NULL);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index cfb83bcfcb17..b1d1a87f05e3 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -346,7 +346,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
 {
 	struct osc_extent *ext;
 
-	ext = kmem_cache_alloc(osc_extent_kmem, GFP_IOFS | __GFP_ZERO);
+	ext = kmem_cache_alloc(osc_extent_kmem, GFP_NOFS | __GFP_ZERO);
 	if (ext == NULL)
 		return NULL;
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index b56e811b6f7c..86f9f7da86ea 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -134,7 +134,6 @@ struct vm_area_struct;
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
-#define GFP_IOFS	(__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM)
 #define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
 			 ~__GFP_KSWAPD_RECLAIM)
-- 
cgit v1.2.3


From 71baba4b92dc1fa1bc461742c6ab1942ec6034e9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:28 -0800
Subject: mm, page_alloc: rename __GFP_WAIT to __GFP_RECLAIM

__GFP_WAIT was used to signal that the caller was in atomic context and
could not sleep.  Now it is possible to distinguish between true atomic
context and callers that are not willing to sleep.  The latter should
clear __GFP_DIRECT_RECLAIM so kswapd will still wake.  As clearing
__GFP_WAIT behaves differently, there is a risk that people will clear the
wrong flags.  This patch renames __GFP_WAIT to __GFP_RECLAIM to clearly
indicate what it does -- setting it allows all reclaim activity, clearing
them prevents it.

[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/blk-core.c                            |  4 ++--
 block/blk-mq.c                              |  2 +-
 block/scsi_ioctl.c                          |  6 +++---
 drivers/block/drbd/drbd_bitmap.c            |  2 +-
 drivers/block/mtip32xx/mtip32xx.c           |  2 +-
 drivers/block/paride/pd.c                   |  2 +-
 drivers/block/pktcdvd.c                     |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c             |  2 +-
 drivers/ide/ide-atapi.c                     |  2 +-
 drivers/ide/ide-cd.c                        |  2 +-
 drivers/ide/ide-cd_ioctl.c                  |  2 +-
 drivers/ide/ide-devsets.c                   |  2 +-
 drivers/ide/ide-disk.c                      |  2 +-
 drivers/ide/ide-ioctls.c                    |  4 ++--
 drivers/ide/ide-park.c                      |  2 +-
 drivers/ide/ide-pm.c                        |  4 ++--
 drivers/ide/ide-tape.c                      |  4 ++--
 drivers/ide/ide-taskfile.c                  |  4 ++--
 drivers/infiniband/hw/qib/qib_init.c        |  2 +-
 drivers/misc/vmw_balloon.c                  |  2 +-
 drivers/nvme/host/pci.c                     |  6 ++++--
 drivers/scsi/scsi_error.c                   |  2 +-
 drivers/scsi/scsi_lib.c                     |  4 ++--
 drivers/staging/rdma/hfi1/init.c            |  2 +-
 drivers/staging/rdma/ipath/ipath_file_ops.c |  2 +-
 fs/cachefiles/internal.h                    |  2 +-
 fs/direct-io.c                              |  2 +-
 fs/nilfs2/mdt.h                             |  2 +-
 include/linux/gfp.h                         | 16 ++++++++--------
 kernel/power/swap.c                         | 16 ++++++++--------
 lib/percpu_ida.c                            |  2 +-
 mm/failslab.c                               |  8 ++++----
 mm/filemap.c                                |  2 +-
 mm/huge_memory.c                            |  2 +-
 mm/memcontrol.c                             |  2 +-
 mm/migrate.c                                |  2 +-
 mm/page_alloc.c                             |  9 +++++----
 security/integrity/ima/ima_crypto.c         |  2 +-
 38 files changed, 71 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9e32f0868e36..590cca21c24a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
 		if (percpu_ref_tryget_live(&q->q_usage_counter))
 			return 0;
 
-		if (!(gfp & __GFP_WAIT))
+		if (!gfpflags_allow_blocking(gfp))
 			return -EBUSY;
 
 		ret = wait_event_interruptible(q->mq_freeze_wq,
@@ -2038,7 +2038,7 @@ void generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) {
+		if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
 
 			q->make_request_fn(q, bio);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 68c0a3416b34..694f8703f83c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 		ctx = blk_mq_get_ctx(q);
 		hctx = q->mq_ops->map_queue(q, ctx->cpu);
 		blk_mq_set_alloc_data(&alloc_data, q,
-				__GFP_WAIT|__GFP_HIGH, false, ctx, hctx);
+				__GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx);
 		rq = __blk_mq_alloc_request(&alloc_data, rw);
 		ctx = alloc_data.ctx;
 		hctx = alloc_data.hctx;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index dda653ce7b24..0774799942e0 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -444,7 +444,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 
 	}
 
-	rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
+	rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto error_free_buffer;
@@ -495,7 +495,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		break;
 	}
 
-	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) {
+	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
 		err = DRIVER_ERROR << 24;
 		goto error;
 	}
@@ -536,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	struct request *rq;
 	int err;
 
-	rq = blk_get_request(q, WRITE, __GFP_WAIT);
+	rq = blk_get_request(q, WRITE, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	blk_rq_set_block_pc(rq);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index e5e0f19ceda0..3dc53a16ed3a 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1007,7 +1007,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
 	bm_set_page_unchanged(b->bm_pages[page_nr]);
 
 	if (ctx->flags & BM_AIO_COPY_PAGES) {
-		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
+		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
 		copy_highpage(page, b->bm_pages[page_nr]);
 		bm_store_page_idx(page, page_nr);
 	} else
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index f504232c1ee7..a28a562f7b7f 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 {
 	struct request *rq;
 
-	rq = blk_mq_alloc_request(dd->queue, 0, __GFP_WAIT, true);
+	rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
 	return blk_mq_rq_to_pdu(rq);
 }
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index b9242d78283d..562b5a4ca7b7 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -723,7 +723,7 @@ static int pd_special_command(struct pd_unit *disk,
 	struct request *rq;
 	int err = 0;
 
-	rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7be2375db7f2..5959c2981cc7 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,14 +704,14 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	int ret = 0;
 
 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
-			     WRITE : READ, __GFP_WAIT);
+			     WRITE : READ, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	blk_rq_set_block_pc(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
-				      __GFP_WAIT);
+				      __GFP_RECLAIM);
 		if (ret)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d58cb9e034fe..7e505d4be7c0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2216,7 +2216,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	mapping = file_inode(obj->base.filp)->i_mapping;
 	gfp = mapping_gfp_mask(mapping);
 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
-	gfp &= ~(__GFP_IO | __GFP_WAIT);
+	gfp &= ~(__GFP_IO | __GFP_RECLAIM);
 	sg = st->sgl;
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 1362ad80a76c..05352f490d60 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	struct request *rq;
 	int error;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->special = (char *)pc;
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 64a6b827b3dd..ef907fd5ba98 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -441,7 +441,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		struct request *rq;
 		int error;
 
-		rq = blk_get_request(drive->queue, write, __GFP_WAIT);
+		rq = blk_get_request(drive->queue, write, __GFP_RECLAIM);
 
 		memcpy(rq->cmd, cmd, BLK_MAX_CDB);
 		rq->cmd_type = REQ_TYPE_ATA_PC;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 066e39036518..474173eb31bb 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -303,7 +303,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 	struct request *rq;
 	int ret;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd_flags = REQ_QUIET;
 	ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index b05a74d78ef5..0dd43b4fcec6 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -165,7 +165,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 	if (!(setting->flags & DS_SYNC))
 		return setting->set(drive, arg);
 
-	rq = blk_get_request(q, READ, __GFP_WAIT);
+	rq = blk_get_request(q, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd_len = 5;
 	rq->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 56b9708894a5..37a8a907febe 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -477,7 +477,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
 		return -EBUSY;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	drive->mult_req = arg;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index aa2e9b77b20d..d05db2469209 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 	if (NULL == (void *) arg) {
 		struct request *rq;
 
-		rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+		rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 		rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 		err = blk_execute_rq(drive->queue, NULL, rq, 0);
 		blk_put_request(rq);
@@ -221,7 +221,7 @@ static int generic_drive_reset(ide_drive_t *drive)
 	struct request *rq;
 	int ret = 0;
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd_len = 1;
 	rq->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index c80868520488..2d7dca56dd24 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	}
 	spin_unlock_irq(&hwif->lock);
 
-	rq = blk_get_request(q, READ, __GFP_WAIT);
+	rq = blk_get_request(q, READ, __GFP_RECLAIM);
 	rq->cmd[0] = REQ_PARK_HEADS;
 	rq->cmd_len = 1;
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 081e43458d50..e34af488693a 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -18,7 +18,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -88,7 +88,7 @@ int generic_ide_resume(struct device *dev)
 	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
 	rq->cmd_flags |= REQ_PREEMPT;
 	rq->special = &rqpm;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index f5d51d1d09ee..12fa04997dcc 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -852,7 +852,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
 	BUG_ON(size < 0 || size % tape->blk_size);
 
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
@@ -860,7 +860,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 
 	if (size) {
 		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
-				      __GFP_WAIT);
+				      __GFP_RECLAIM);
 		if (ret)
 			goto out_put;
 	}
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 0979e126fff1..a716693417a3 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -430,7 +430,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	int error;
 	int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE;
 
-	rq = blk_get_request(drive->queue, rw, __GFP_WAIT);
+	rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	/*
@@ -441,7 +441,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	 */
 	if (nsect) {
 		error = blk_rq_map_kern(drive->queue, rq, buf,
-					nsect * SECTOR_SIZE, __GFP_WAIT);
+					nsect * SECTOR_SIZE, __GFP_RECLAIM);
 		if (error)
 			goto put_req;
 	}
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 7e00470adc30..4ff340fe904f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
 	 * heavy filesystem activity makes these fail, and we can
 	 * use compound pages.
 	 */
-	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
 
 	egrcnt = rcd->rcvegrcnt;
 	egroff = rcd->rcvegr_tid_base;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 89300870fefb..1e688bfec567 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -75,7 +75,7 @@ MODULE_LICENSE("GPL");
 
 /*
  * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
- * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
+ * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
  * __GFP_NOWARN, to suppress page allocation failure warnings.
  */
 #define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e878590e71b6..6c195554d94a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1025,11 +1025,13 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	req->special = (void *)0;
 
 	if (buffer && bufflen) {
-		ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
+		ret = blk_rq_map_kern(q, req, buffer, bufflen,
+				      __GFP_DIRECT_RECLAIM);
 		if (ret)
 			goto out;
 	} else if (ubuffer && bufflen) {
-		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
+		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+				      __GFP_DIRECT_RECLAIM);
 		if (ret)
 			goto out;
 		bio = req->bio;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 66a96cd98b97..984ddcb4786d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1970,7 +1970,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	struct request *req;
 
 	/*
-	 * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a
+	 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
 	 * request becomes available
 	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 126a48c6431e..dd8ad2a44510 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -222,13 +222,13 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	int write = (data_direction == DMA_TO_DEVICE);
 	int ret = DRIVER_ERROR << 24;
 
-	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
+	req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM);
 	if (IS_ERR(req))
 		return ret;
 	blk_rq_set_block_pc(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
-					buffer, bufflen, __GFP_WAIT))
+					buffer, bufflen, __GFP_RECLAIM))
 		goto out;
 
 	req->cmd_len = COMMAND_SIZE(cmd[0]);
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 47a1202fcbdf..8666f3ad24e9 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -1560,7 +1560,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 	 * heavy filesystem activity makes these fail, and we can
 	 * use compound pages.
 	 */
-	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
 
 	/*
 	 * The minimum size of the eager buffers is a groups of MTU-sized
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
index 5d9b9dbd8fc4..13c3cd11ab92 100644
--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
+++ b/drivers/staging/rdma/ipath/ipath_file_ops.c
@@ -905,7 +905,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
 	 * heavy filesystem activity makes these fail, and we can
 	 * use compound pages.
 	 */
-	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
 
 	egrcnt = dd->ipath_rcvegrcnt;
 	/* TID number offset for this port */
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index aecd0859eacb..9c4b737a54df 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -30,7 +30,7 @@ extern unsigned cachefiles_debug;
 #define CACHEFILES_DEBUG_KLEAVE	2
 #define CACHEFILES_DEBUG_KDEBUG	4
 
-#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC)
+#define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC)
 
 /*
  * node records
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3ae0e0427191..18e7554cf94c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -361,7 +361,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 
 	/*
 	 * bio_alloc() is guaranteed to return a bio when called with
-	 * __GFP_WAIT and we request a valid number of vectors.
+	 * __GFP_RECLAIM and we request a valid number of vectors.
 	 */
 	bio = bio_alloc(GFP_KERNEL, nr_vecs);
 
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index fe529a87a208..03246cac3338 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -72,7 +72,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
 }
 
 /* Default GFP flags using highmem */
-#define NILFS_MDT_GFP      (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
+#define NILFS_MDT_GFP      (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM)
 
 int nilfs_mdt_get_block(struct inode *, unsigned long, int,
 			void (*init_block)(struct inode *,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 86f9f7da86ea..369227202ac2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -107,7 +107,7 @@ struct vm_area_struct;
  * can be cleared when the reclaiming of pages would cause unnecessary
  * disruption.
  */
-#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
 #define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
 
@@ -126,12 +126,12 @@ struct vm_area_struct;
  */
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
-#define GFP_NOIO	(__GFP_WAIT)
-#define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
-#define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
-#define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
+#define GFP_NOIO	(__GFP_RECLAIM)
+#define GFP_NOFS	(__GFP_RECLAIM | __GFP_IO)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_TEMPORARY	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
 			 __GFP_RECLAIMABLE)
-#define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_USER	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
 #define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
@@ -143,12 +143,12 @@ struct vm_area_struct;
 #define GFP_MOVABLE_SHIFT 3
 
 /* Control page allocator reclaim behavior */
-#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
 
 /* Control slab gfp mask during early boot */
-#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
+#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
 
 /* Control allocation constraints */
 #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b2066fb5b10f..12cd989dadf6 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -257,7 +257,7 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
 	struct bio *bio;
 	int error = 0;
 
-	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+	bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
 	bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio->bi_bdev = hib_resume_bdev;
 
@@ -356,7 +356,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 		return -ENOSPC;
 
 	if (hb) {
-		src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
+		src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN |
 		                              __GFP_NORETRY);
 		if (src) {
 			copy_page(src, buf);
@@ -364,7 +364,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
 			ret = hib_wait_io(hb); /* Free pages */
 			if (ret)
 				return ret;
-			src = (void *)__get_free_page(__GFP_WAIT |
+			src = (void *)__get_free_page(__GFP_RECLAIM |
 			                              __GFP_NOWARN |
 			                              __GFP_NORETRY);
 			if (src) {
@@ -672,7 +672,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
 	nr_threads = num_online_cpus() - 1;
 	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
 
-	page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+	page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH);
 	if (!page) {
 		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
 		ret = -ENOMEM;
@@ -975,7 +975,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
 		last = tmp;
 
 		tmp->map = (struct swap_map_page *)
-		           __get_free_page(__GFP_WAIT | __GFP_HIGH);
+			   __get_free_page(__GFP_RECLAIM | __GFP_HIGH);
 		if (!tmp->map) {
 			release_swap_reader(handle);
 			return -ENOMEM;
@@ -1242,9 +1242,9 @@ static int load_image_lzo(struct swap_map_handle *handle,
 
 	for (i = 0; i < read_pages; i++) {
 		page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
-		                                  __GFP_WAIT | __GFP_HIGH :
-		                                  __GFP_WAIT | __GFP_NOWARN |
-		                                  __GFP_NORETRY);
+						  __GFP_RECLAIM | __GFP_HIGH :
+						  __GFP_RECLAIM | __GFP_NOWARN |
+						  __GFP_NORETRY);
 
 		if (!page[i]) {
 			if (i < LZO_CMP_PAGES) {
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index f75715131f20..6d40944960de 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -135,7 +135,7 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
  * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
  *
  * @gfp indicates whether or not to wait until a free id is available (it's not
- * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep
+ * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
  * however long it takes until another thread frees an id (same semantics as a
  * mempool).
  *
diff --git a/mm/failslab.c b/mm/failslab.c
index 98fb490311eb..79171b4a5826 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -3,11 +3,11 @@
 
 static struct {
 	struct fault_attr attr;
-	bool ignore_gfp_wait;
+	bool ignore_gfp_reclaim;
 	bool cache_filter;
 } failslab = {
 	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = true,
+	.ignore_gfp_reclaim = true,
 	.cache_filter = false,
 };
 
@@ -16,7 +16,7 @@ bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
 	if (gfpflags & __GFP_NOFAIL)
 		return false;
 
-        if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
+	if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM))
 		return false;
 
 	if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB))
@@ -42,7 +42,7 @@ static int __init failslab_debugfs_init(void)
 		return PTR_ERR(dir);
 
 	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
-				&failslab.ignore_gfp_wait))
+				&failslab.ignore_gfp_reclaim))
 		goto fail;
 	if (!debugfs_create_bool("cache-filter", mode, dir,
 				&failslab.cache_filter))
diff --git a/mm/filemap.c b/mm/filemap.c
index 58e04e26f996..6ef3674c0763 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2713,7 +2713,7 @@ EXPORT_SYMBOL(generic_file_write_iter);
  * page is known to the local caching routines.
  *
  * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
+ * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
  *
  */
 int try_to_release_page(struct page *page, gfp_t gfp_mask)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f5c08b46fef8..9812d4618651 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -786,7 +786,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 
 static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 {
-	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
+	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp;
 }
 
 /* Caller must hold page table lock. */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 05374f09339c..a5470674a477 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2120,7 +2120,7 @@ done_restock:
 	/*
 	 * If the hierarchy is above the normal consumption range, schedule
 	 * reclaim on returning to userland.  We can perform reclaim here
-	 * if __GFP_WAIT but let's always punt for simplicity and so that
+	 * if __GFP_RECLAIM but let's always punt for simplicity and so that
 	 * GFP_KERNEL can consistently be used during reclaim.  @memcg is
 	 * not recorded as it most likely matches current's and won't
 	 * change in the meantime.  As high limit is checked again before
diff --git a/mm/migrate.c b/mm/migrate.c
index e60379eb23f8..7890d0bb5e23 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1752,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 		goto out_dropref;
 
 	new_page = alloc_pages_node(node,
-		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
+		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
 		HPAGE_PMD_ORDER);
 	if (!new_page)
 		goto out_fail;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 70461f3e3378..1b373096b990 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2160,11 +2160,11 @@ static struct {
 	struct fault_attr attr;
 
 	bool ignore_gfp_highmem;
-	bool ignore_gfp_wait;
+	bool ignore_gfp_reclaim;
 	u32 min_order;
 } fail_page_alloc = {
 	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = true,
+	.ignore_gfp_reclaim = true,
 	.ignore_gfp_highmem = true,
 	.min_order = 1,
 };
@@ -2183,7 +2183,8 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 		return false;
 	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
 		return false;
-	if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM))
+	if (fail_page_alloc.ignore_gfp_reclaim &&
+			(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return false;
 
 	return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2202,7 +2203,7 @@ static int __init fail_page_alloc_debugfs(void)
 		return PTR_ERR(dir);
 
 	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
-				&fail_page_alloc.ignore_gfp_wait))
+				&fail_page_alloc.ignore_gfp_reclaim))
 		goto fail;
 	if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir,
 				&fail_page_alloc.ignore_gfp_highmem))
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index e24121afb2f2..6eb62936c672 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -126,7 +126,7 @@ static void *ima_alloc_pages(loff_t max_size, size_t *allocated_size,
 {
 	void *ptr;
 	int order = ima_maxorder;
-	gfp_t gfp_mask = __GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY;
+	gfp_t gfp_mask = __GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY;
 
 	if (order)
 		order = min(get_order(max_size), order);
-- 
cgit v1.2.3


From f77cf4e4cc9d40310a7224a1a67c733aeec78836 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:31 -0800
Subject: mm, page_alloc: delete the zonelist_cache

The zonelist cache (zlc) was introduced to skip over zones that were
recently known to be full.  This avoided expensive operations such as the
cpuset checks, watermark calculations and zone_reclaim.  The situation
today is different and the complexity of zlc is harder to justify.

1) The cpuset checks are no-ops unless a cpuset is active and in general
   are a lot cheaper.

2) zone_reclaim is now disabled by default and I suspect that was a large
   source of the cost that zlc wanted to avoid. When it is enabled, it's
   known to be a major source of stalling when nodes fill up and it's
   unwise to hit every other user with the overhead.

3) Watermark checks are expensive to calculate for high-order
   allocation requests. Later patches in this series will reduce the cost
   of the watermark checking.

4) The most important issue is that in the current implementation it
   is possible for a failed THP allocation to mark a zone full for order-0
   allocations and cause a fallback to remote nodes.

The last issue could be addressed with additional complexity but as the
benefit of zlc is questionable, it is better to remove it.  If stalls due
to zone_reclaim are ever reported then an alternative would be to
introduce deferring logic based on a timeout inside zone_reclaim itself
and leave the page allocator fast paths alone.

The impact on page-allocator microbenchmarks is negligible as they don't
hit the paths where the zlc comes into play.  Most page-reclaim related
workloads showed no noticeable difference as a result of the removal.

The impact was noticeable in a workload called "stutter".  One part uses a
lot of anonymous memory, a second measures mmap latency and a third copies
a large file.  In an ideal world the latency application would not notice
the mmap latency.  On a 2-node machine the results of this patch are

stutter
                             4.3.0-rc1             4.3.0-rc1
                              baseline              nozlc-v4
Min         mmap     20.9243 (  0.00%)     20.7716 (  0.73%)
1st-qrtle   mmap     22.0612 (  0.00%)     22.0680 ( -0.03%)
2nd-qrtle   mmap     22.3291 (  0.00%)     22.3809 ( -0.23%)
3rd-qrtle   mmap     25.2244 (  0.00%)     25.2396 ( -0.06%)
Max-90%     mmap     48.0995 (  0.00%)     28.3713 ( 41.02%)
Max-93%     mmap     52.5557 (  0.00%)     36.0170 ( 31.47%)
Max-95%     mmap     55.8173 (  0.00%)     47.3163 ( 15.23%)
Max-99%     mmap     67.3781 (  0.00%)     70.1140 ( -4.06%)
Max         mmap  24447.6375 (  0.00%)  12915.1356 ( 47.17%)
Mean        mmap     33.7883 (  0.00%)     27.7944 ( 17.74%)
Best99%Mean mmap     27.7825 (  0.00%)     25.2767 (  9.02%)
Best95%Mean mmap     26.3912 (  0.00%)     23.7994 (  9.82%)
Best90%Mean mmap     24.9886 (  0.00%)     23.2251 (  7.06%)
Best50%Mean mmap     22.0157 (  0.00%)     22.0261 ( -0.05%)
Best10%Mean mmap     21.6705 (  0.00%)     21.6083 (  0.29%)
Best5%Mean  mmap     21.5581 (  0.00%)     21.4611 (  0.45%)
Best1%Mean  mmap     21.3079 (  0.00%)     21.1631 (  0.68%)

Note that the maximum stall latency went from 24 seconds to 12 which is
still bad but an improvement.  The milage varies considerably 2-node
machine on an earlier test went from 494 seconds to 47 seconds and a
4-node machine that tested an earlier version of this patch went from a
worst case stall time of 6 seconds to 67ms.  The nature of the benchmark
is inherently unpredictable as it is hammering the system and the milage
will vary between machines.

There is a secondary impact with potentially more direct reclaim because
zones are now being considered instead of being skipped by zlc.  In this
particular test run it did not occur so will not be described.  However,
in at least one test the following was observed

1. Direct reclaim rates were higher. This was likely due to direct reclaim
  being entered instead of the zlc disabling a zone and busy looping.
  Busy looping may have the effect of allowing kswapd to make more
  progress and in some cases may be better overall. If this is found then
  the correct action is to put direct reclaimers to sleep on a waitqueue
  and allow kswapd make forward progress. Busy looping on the zlc is even
  worse than when the allocator used to blindly call congestion_wait().

2. There was higher swap activity as direct reclaim was active.

3. Direct reclaim efficiency was lower. This is related to 1 as more
  scanning activity also encountered more pages that could not be
  immediately reclaimed

In that case, the direct page scan and reclaim rates are noticeable but
it is not considered a problem for a few reasons

1. The test is primarily concerned with latency. The mmap attempts are also
   faulted which means there are THP allocation requests. The ZLC could
   cause zones to be disabled causing the process to busy loop instead
   of reclaiming.  This looks like elevated direct reclaim activity but
   it's the correct action to take based on what processes requested.

2. The test hammers reclaim and compaction heavily. The number of successful
   THP faults is highly variable but affects the reclaim stats. It's not a
   realistic or reasonable measure of page reclaim activity.

3. No other page-reclaim intensive workload that was tested showed a problem.

4. If a workload is identified that benefitted from the busy looping then it
   should be fixed by having direct reclaimers sleep on a wait queue until
   woken by kswapd instead of busy looping. We had this class of problem before
   when congestion_waits() with a fixed timeout was a brain damaged decision
   but happened to benefit some workloads.

If a workload is identified that relied on the zlc to busy loop then it
should be fixed correctly and have a direct reclaimer sleep on a waitqueue
until woken by kswapd.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  74 -----------------
 mm/page_alloc.c        | 212 -------------------------------------------------
 2 files changed, 286 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 38bed71758ab..1e88aae329ff 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -589,75 +589,8 @@ static inline bool zone_is_empty(struct zone *zone)
  * [1]	: No fallback (__GFP_THISNODE)
  */
 #define MAX_ZONELISTS 2
-
-
-/*
- * We cache key information from each zonelist for smaller cache
- * footprint when scanning for free pages in get_page_from_freelist().
- *
- * 1) The BITMAP fullzones tracks which zones in a zonelist have come
- *    up short of free memory since the last time (last_fullzone_zap)
- *    we zero'd fullzones.
- * 2) The array z_to_n[] maps each zone in the zonelist to its node
- *    id, so that we can efficiently evaluate whether that node is
- *    set in the current tasks mems_allowed.
- *
- * Both fullzones and z_to_n[] are one-to-one with the zonelist,
- * indexed by a zones offset in the zonelist zones[] array.
- *
- * The get_page_from_freelist() routine does two scans.  During the
- * first scan, we skip zones whose corresponding bit in 'fullzones'
- * is set or whose corresponding node in current->mems_allowed (which
- * comes from cpusets) is not set.  During the second scan, we bypass
- * this zonelist_cache, to ensure we look methodically at each zone.
- *
- * Once per second, we zero out (zap) fullzones, forcing us to
- * reconsider nodes that might have regained more free memory.
- * The field last_full_zap is the time we last zapped fullzones.
- *
- * This mechanism reduces the amount of time we waste repeatedly
- * reexaming zones for free memory when they just came up low on
- * memory momentarilly ago.
- *
- * The zonelist_cache struct members logically belong in struct
- * zonelist.  However, the mempolicy zonelists constructed for
- * MPOL_BIND are intentionally variable length (and usually much
- * shorter).  A general purpose mechanism for handling structs with
- * multiple variable length members is more mechanism than we want
- * here.  We resort to some special case hackery instead.
- *
- * The MPOL_BIND zonelists don't need this zonelist_cache (in good
- * part because they are shorter), so we put the fixed length stuff
- * at the front of the zonelist struct, ending in a variable length
- * zones[], as is needed by MPOL_BIND.
- *
- * Then we put the optional zonelist cache on the end of the zonelist
- * struct.  This optional stuff is found by a 'zlcache_ptr' pointer in
- * the fixed length portion at the front of the struct.  This pointer
- * both enables us to find the zonelist cache, and in the case of
- * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL)
- * to know that the zonelist cache is not there.
- *
- * The end result is that struct zonelists come in two flavors:
- *  1) The full, fixed length version, shown below, and
- *  2) The custom zonelists for MPOL_BIND.
- * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache.
- *
- * Even though there may be multiple CPU cores on a node modifying
- * fullzones or last_full_zap in the same zonelist_cache at the same
- * time, we don't lock it.  This is just hint data - if it is wrong now
- * and then, the allocator will still function, perhaps a bit slower.
- */
-
-
-struct zonelist_cache {
-	unsigned short z_to_n[MAX_ZONES_PER_ZONELIST];		/* zone->nid */
-	DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST);	/* zone full? */
-	unsigned long last_full_zap;		/* when last zap'd (jiffies) */
-};
 #else
 #define MAX_ZONELISTS 1
-struct zonelist_cache;
 #endif
 
 /*
@@ -675,9 +608,6 @@ struct zoneref {
  * allocation, the other zones are fallback zones, in decreasing
  * priority.
  *
- * If zlcache_ptr is not NULL, then it is just the address of zlcache,
- * as explained above.  If zlcache_ptr is NULL, there is no zlcache.
- * *
  * To speed the reading of the zonelist, the zonerefs contain the zone index
  * of the entry being read. Helper functions to access information given
  * a struct zoneref are
@@ -687,11 +617,7 @@ struct zoneref {
  * zonelist_node_idx()	- Return the index of the node for an entry
  */
 struct zonelist {
-	struct zonelist_cache *zlcache_ptr;		     // NULL or &zlcache
 	struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
-#ifdef CONFIG_NUMA
-	struct zonelist_cache zlcache;			     // optional ...
-#endif
 };
 
 #ifndef CONFIG_DISCONTIGMEM
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1b373096b990..8dc6e3cd40f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2292,122 +2292,6 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
 }
 
 #ifdef CONFIG_NUMA
-/*
- * zlc_setup - Setup for "zonelist cache".  Uses cached zone data to
- * skip over zones that are not allowed by the cpuset, or that have
- * been recently (in last second) found to be nearly full.  See further
- * comments in mmzone.h.  Reduces cache footprint of zonelist scans
- * that have to skip over a lot of full or unallowed zones.
- *
- * If the zonelist cache is present in the passed zonelist, then
- * returns a pointer to the allowed node mask (either the current
- * tasks mems_allowed, or node_states[N_MEMORY].)
- *
- * If the zonelist cache is not available for this zonelist, does
- * nothing and returns NULL.
- *
- * If the fullzones BITMAP in the zonelist cache is stale (more than
- * a second since last zap'd) then we zap it out (clear its bits.)
- *
- * We hold off even calling zlc_setup, until after we've checked the
- * first zone in the zonelist, on the theory that most allocations will
- * be satisfied from that first zone, so best to examine that zone as
- * quickly as we can.
- */
-static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-	nodemask_t *allowednodes;	/* zonelist_cache approximation */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return NULL;
-
-	if (time_after(jiffies, zlc->last_full_zap + HZ)) {
-		bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-		zlc->last_full_zap = jiffies;
-	}
-
-	allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
-					&cpuset_current_mems_allowed :
-					&node_states[N_MEMORY];
-	return allowednodes;
-}
-
-/*
- * Given 'z' scanning a zonelist, run a couple of quick checks to see
- * if it is worth looking at further for free memory:
- *  1) Check that the zone isn't thought to be full (doesn't have its
- *     bit set in the zonelist_cache fullzones BITMAP).
- *  2) Check that the zones node (obtained from the zonelist_cache
- *     z_to_n[] mapping) is allowed in the passed in allowednodes mask.
- * Return true (non-zero) if zone is worth looking at further, or
- * else return false (zero) if it is not.
- *
- * This check -ignores- the distinction between various watermarks,
- * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ...  If a zone is
- * found to be full for any variation of these watermarks, it will
- * be considered full for up to one second by all requests, unless
- * we are so low on memory on all allowed nodes that we are forced
- * into the second scan of the zonelist.
- *
- * In the second scan we ignore this zonelist cache and exactly
- * apply the watermarks to all zones, even it is slower to do so.
- * We are low on memory in the second scan, and should leave no stone
- * unturned looking for a free page.
- */
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
-						nodemask_t *allowednodes)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-	int i;				/* index of *z in zonelist zones */
-	int n;				/* node that zone *z is on */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return 1;
-
-	i = z - zonelist->_zonerefs;
-	n = zlc->z_to_n[i];
-
-	/* This zone is worth trying if it is allowed but not full */
-	return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones);
-}
-
-/*
- * Given 'z' scanning a zonelist, set the corresponding bit in
- * zlc->fullzones, so that subsequent attempts to allocate a page
- * from that zone don't waste time re-examining it.
- */
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-	int i;				/* index of *z in zonelist zones */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return;
-
-	i = z - zonelist->_zonerefs;
-
-	set_bit(i, zlc->fullzones);
-}
-
-/*
- * clear all zones full, called after direct reclaim makes progress so that
- * a zone that was recently full is not skipped over for up to a second
- */
-static void zlc_clear_zones_full(struct zonelist *zonelist)
-{
-	struct zonelist_cache *zlc;	/* cached zonelist speedup info */
-
-	zlc = zonelist->zlcache_ptr;
-	if (!zlc)
-		return;
-
-	bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-}
-
 static bool zone_local(struct zone *local_zone, struct zone *zone)
 {
 	return local_zone->node == zone->node;
@@ -2418,28 +2302,7 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 	return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
 				RECLAIM_DISTANCE;
 }
-
 #else	/* CONFIG_NUMA */
-
-static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
-{
-	return NULL;
-}
-
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
-				nodemask_t *allowednodes)
-{
-	return 1;
-}
-
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
-{
-}
-
-static void zlc_clear_zones_full(struct zonelist *zonelist)
-{
-}
-
 static bool zone_local(struct zone *local_zone, struct zone *zone)
 {
 	return true;
@@ -2449,7 +2312,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
 	return true;
 }
-
 #endif	/* CONFIG_NUMA */
 
 static void reset_alloc_batches(struct zone *preferred_zone)
@@ -2476,9 +2338,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct zoneref *z;
 	struct page *page = NULL;
 	struct zone *zone;
-	nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
-	int zlc_active = 0;		/* set if using zonelist_cache */
-	int did_zlc_setup = 0;		/* just call zlc_setup() one time */
 	int nr_fair_skipped = 0;
 	bool zonelist_rescan;
 
@@ -2493,9 +2352,6 @@ zonelist_scan:
 								ac->nodemask) {
 		unsigned long mark;
 
-		if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
-			!zlc_zone_worth_trying(zonelist, z, allowednodes))
-				continue;
 		if (cpusets_enabled() &&
 			(alloc_flags & ALLOC_CPUSET) &&
 			!cpuset_zone_allowed(zone, gfp_mask))
@@ -2553,28 +2409,8 @@ zonelist_scan:
 			if (alloc_flags & ALLOC_NO_WATERMARKS)
 				goto try_this_zone;
 
-			if (IS_ENABLED(CONFIG_NUMA) &&
-					!did_zlc_setup && nr_online_nodes > 1) {
-				/*
-				 * we do zlc_setup if there are multiple nodes
-				 * and before considering the first zone allowed
-				 * by the cpuset.
-				 */
-				allowednodes = zlc_setup(zonelist, alloc_flags);
-				zlc_active = 1;
-				did_zlc_setup = 1;
-			}
-
 			if (zone_reclaim_mode == 0 ||
 			    !zone_allows_reclaim(ac->preferred_zone, zone))
-				goto this_zone_full;
-
-			/*
-			 * As we may have just activated ZLC, check if the first
-			 * eligible zone has failed zone_reclaim recently.
-			 */
-			if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
-				!zlc_zone_worth_trying(zonelist, z, allowednodes))
 				continue;
 
 			ret = zone_reclaim(zone, gfp_mask, order);
@@ -2591,19 +2427,6 @@ zonelist_scan:
 						ac->classzone_idx, alloc_flags))
 					goto try_this_zone;
 
-				/*
-				 * Failed to reclaim enough to meet watermark.
-				 * Only mark the zone full if checking the min
-				 * watermark or if we failed to reclaim just
-				 * 1<<order pages or else the page allocator
-				 * fastpath will prematurely mark zones full
-				 * when the watermark is between the low and
-				 * min watermarks.
-				 */
-				if (((alloc_flags & ALLOC_WMARK_MASK) == ALLOC_WMARK_MIN) ||
-				    ret == ZONE_RECLAIM_SOME)
-					goto this_zone_full;
-
 				continue;
 			}
 		}
@@ -2616,9 +2439,6 @@ try_this_zone:
 				goto try_this_zone;
 			return page;
 		}
-this_zone_full:
-		if (IS_ENABLED(CONFIG_NUMA) && zlc_active)
-			zlc_mark_zone_full(zonelist, z);
 	}
 
 	/*
@@ -2639,12 +2459,6 @@ this_zone_full:
 			zonelist_rescan = true;
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
-		/* Disable zlc cache for second zonelist scan */
-		zlc_active = 0;
-		zonelist_rescan = true;
-	}
-
 	if (zonelist_rescan)
 		goto zonelist_scan;
 
@@ -2889,10 +2703,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
 	if (unlikely(!(*did_some_progress)))
 		return NULL;
 
-	/* After successful reclaim, reconsider all zones for allocation */
-	if (IS_ENABLED(CONFIG_NUMA))
-		zlc_clear_zones_full(ac->zonelist);
-
 retry:
 	page = get_page_from_freelist(gfp_mask, order,
 					alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
@@ -4228,20 +4038,6 @@ static void build_zonelists(pg_data_t *pgdat)
 	build_thisnode_zonelists(pgdat);
 }
 
-/* Construct the zonelist performance cache - see further mmzone.h */
-static void build_zonelist_cache(pg_data_t *pgdat)
-{
-	struct zonelist *zonelist;
-	struct zonelist_cache *zlc;
-	struct zoneref *z;
-
-	zonelist = &pgdat->node_zonelists[0];
-	zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
-	bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-	for (z = zonelist->_zonerefs; z->zone; z++)
-		zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
-}
-
 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
 /*
  * Return node id of node used for "local" allocations.
@@ -4302,12 +4098,6 @@ static void build_zonelists(pg_data_t *pgdat)
 	zonelist->_zonerefs[j].zone_idx = 0;
 }
 
-/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
-static void build_zonelist_cache(pg_data_t *pgdat)
-{
-	pgdat->node_zonelists[0].zlcache_ptr = NULL;
-}
-
 #endif	/* CONFIG_NUMA */
 
 /*
@@ -4348,14 +4138,12 @@ static int __build_all_zonelists(void *data)
 
 	if (self && !node_online(self->node_id)) {
 		build_zonelists(self);
-		build_zonelist_cache(self);
 	}
 
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);
 
 		build_zonelists(pgdat);
-		build_zonelist_cache(pgdat);
 	}
 
 	/*
-- 
cgit v1.2.3


From 974a786e63c96a2401a78ddba926f34c128474f1 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:34 -0800
Subject: mm, page_alloc: remove MIGRATE_RESERVE

MIGRATE_RESERVE preserves an old property of the buddy allocator that
existed prior to fragmentation avoidance -- min_free_kbytes worth of pages
tended to remain contiguous until the only alternative was to fail the
allocation.  At the time it was discovered that high-order atomic
allocations relied on this property so MIGRATE_RESERVE was introduced.  A
later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch
deletes MIGRATE_RESERVE and supporting code so it'll be easier to review.
Note that this patch in isolation may look like a false regression if
someone was bisecting high-order atomic allocation failures.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  10 +---
 mm/huge_memory.c       |   2 +-
 mm/page_alloc.c        | 148 +++----------------------------------------------
 mm/vmstat.c            |   1 -
 4 files changed, 11 insertions(+), 150 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1e88aae329ff..b86cfa3313cf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -39,8 +39,6 @@ enum {
 	MIGRATE_UNMOVABLE,
 	MIGRATE_MOVABLE,
 	MIGRATE_RECLAIMABLE,
-	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
-	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
 #ifdef CONFIG_CMA
 	/*
 	 * MIGRATE_CMA migration type is designed to mimic the way
@@ -63,6 +61,8 @@ enum {
 	MIGRATE_TYPES
 };
 
+#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
+
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
 #else
@@ -429,12 +429,6 @@ struct zone {
 
 	const char		*name;
 
-	/*
-	 * Number of MIGRATE_RESERVE page block. To maintain for just
-	 * optimization. Protected by zone->lock.
-	 */
-	int			nr_migrate_reserve_block;
-
 #ifdef CONFIG_MEMORY_ISOLATION
 	/*
 	 * Number of isolated pageblock. It is used to solve incorrect
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9812d4618651..dabd247df535 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void)
 	for_each_populated_zone(zone)
 		nr_zones++;
 
-	/* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
+	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
 	recommended_min = pageblock_nr_pages * nr_zones * 2;
 
 	/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8dc6e3cd40f0..588812614377 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -817,7 +817,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			if (unlikely(has_isolate_pageblock(zone)))
 				mt = get_pageblock_migratetype(page);
 
-			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
 			__free_one_page(page, page_to_pfn(page), zone, 0, mt);
 			trace_mm_page_pcpu_drain(page, 0, mt);
 		} while (--to_free && --batch_free && !list_empty(list));
@@ -1417,15 +1416,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
  * the free lists for the desirable migrate type are depleted
  */
 static int fallbacks[MIGRATE_TYPES][4] = {
-	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,     MIGRATE_RESERVE },
-	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,     MIGRATE_RESERVE },
-	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
+	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
+	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
+	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
 #ifdef CONFIG_CMA
-	[MIGRATE_CMA]         = { MIGRATE_RESERVE }, /* Never used */
+	[MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
 #endif
-	[MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
 #ifdef CONFIG_MEMORY_ISOLATION
-	[MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
+	[MIGRATE_ISOLATE]     = { MIGRATE_TYPES }, /* Never used */
 #endif
 };
 
@@ -1598,7 +1596,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 	*can_steal = false;
 	for (i = 0;; i++) {
 		fallback_mt = fallbacks[migratetype][i];
-		if (fallback_mt == MIGRATE_RESERVE)
+		if (fallback_mt == MIGRATE_TYPES)
 			break;
 
 		if (list_empty(&area->free_list[fallback_mt]))
@@ -1676,25 +1674,13 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
 {
 	struct page *page;
 
-retry_reserve:
 	page = __rmqueue_smallest(zone, order, migratetype);
-
-	if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
+	if (unlikely(!page)) {
 		if (migratetype == MIGRATE_MOVABLE)
 			page = __rmqueue_cma_fallback(zone, order);
 
 		if (!page)
 			page = __rmqueue_fallback(zone, order, migratetype);
-
-		/*
-		 * Use MIGRATE_RESERVE rather than fail an allocation. goto
-		 * is used because __rmqueue_smallest is an inline function
-		 * and we want just one call site
-		 */
-		if (!page) {
-			migratetype = MIGRATE_RESERVE;
-			goto retry_reserve;
-		}
 	}
 
 	trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -3492,7 +3478,6 @@ static void show_migration_types(unsigned char type)
 		[MIGRATE_UNMOVABLE]	= 'U',
 		[MIGRATE_RECLAIMABLE]	= 'E',
 		[MIGRATE_MOVABLE]	= 'M',
-		[MIGRATE_RESERVE]	= 'R',
 #ifdef CONFIG_CMA
 		[MIGRATE_CMA]		= 'C',
 #endif
@@ -4302,120 +4287,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
 	return ffz(~size);
 }
 
-/*
- * Check if a pageblock contains reserved pages
- */
-static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
-{
-	unsigned long pfn;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-		if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Mark a number of pageblocks as MIGRATE_RESERVE. The number
- * of blocks reserved is based on min_wmark_pages(zone). The memory within
- * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
- * higher will lead to a bigger reserve which will get freed as contiguous
- * blocks as reclaim kicks in
- */
-static void setup_zone_migrate_reserve(struct zone *zone)
-{
-	unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
-	struct page *page;
-	unsigned long block_migratetype;
-	int reserve;
-	int old_reserve;
-
-	/*
-	 * Get the start pfn, end pfn and the number of blocks to reserve
-	 * We have to be careful to be aligned to pageblock_nr_pages to
-	 * make sure that we always check pfn_valid for the first page in
-	 * the block.
-	 */
-	start_pfn = zone->zone_start_pfn;
-	end_pfn = zone_end_pfn(zone);
-	start_pfn = roundup(start_pfn, pageblock_nr_pages);
-	reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
-							pageblock_order;
-
-	/*
-	 * Reserve blocks are generally in place to help high-order atomic
-	 * allocations that are short-lived. A min_free_kbytes value that
-	 * would result in more than 2 reserve blocks for atomic allocations
-	 * is assumed to be in place to help anti-fragmentation for the
-	 * future allocation of hugepages at runtime.
-	 */
-	reserve = min(2, reserve);
-	old_reserve = zone->nr_migrate_reserve_block;
-
-	/* When memory hot-add, we almost always need to do nothing */
-	if (reserve == old_reserve)
-		return;
-	zone->nr_migrate_reserve_block = reserve;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
-		if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
-			return;
-
-		if (!pfn_valid(pfn))
-			continue;
-		page = pfn_to_page(pfn);
-
-		/* Watch out for overlapping nodes */
-		if (page_to_nid(page) != zone_to_nid(zone))
-			continue;
-
-		block_migratetype = get_pageblock_migratetype(page);
-
-		/* Only test what is necessary when the reserves are not met */
-		if (reserve > 0) {
-			/*
-			 * Blocks with reserved pages will never free, skip
-			 * them.
-			 */
-			block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
-			if (pageblock_is_reserved(pfn, block_end_pfn))
-				continue;
-
-			/* If this block is reserved, account for it */
-			if (block_migratetype == MIGRATE_RESERVE) {
-				reserve--;
-				continue;
-			}
-
-			/* Suitable for reserving if this block is movable */
-			if (block_migratetype == MIGRATE_MOVABLE) {
-				set_pageblock_migratetype(page,
-							MIGRATE_RESERVE);
-				move_freepages_block(zone, page,
-							MIGRATE_RESERVE);
-				reserve--;
-				continue;
-			}
-		} else if (!old_reserve) {
-			/*
-			 * At boot time we don't need to scan the whole zone
-			 * for turning off MIGRATE_RESERVE.
-			 */
-			break;
-		}
-
-		/*
-		 * If the reserve is met and this is a previous reserved block,
-		 * take it back
-		 */
-		if (block_migratetype == MIGRATE_RESERVE) {
-			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-			move_freepages_block(zone, page, MIGRATE_MOVABLE);
-		}
-	}
-}
-
 /*
  * Initially all pages are reserved - free ones are freed
  * up by free_all_bootmem() once the early boot process is
@@ -4455,9 +4326,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		 * movable at startup. This will force kernel allocations
 		 * to reserve their blocks rather than leaking throughout
 		 * the address space during boot when many long-lived
-		 * kernel allocations are made. Later some blocks near
-		 * the start are marked MIGRATE_RESERVE by
-		 * setup_zone_migrate_reserve()
+		 * kernel allocations are made.
 		 *
 		 * bitmap is created for zone's valid pfn range. but memmap
 		 * can be created for invalid pages (for alignment)
@@ -6018,7 +5887,6 @@ static void __setup_per_zone_wmarks(void)
 			high_wmark_pages(zone) - low_wmark_pages(zone) -
 			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
 
-		setup_zone_migrate_reserve(zone);
 		spin_unlock_irqrestore(&zone->lock, flags);
 	}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ffcb4f58bf3e..5b289dcdcccf 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,7 +923,6 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
 	"Unmovable",
 	"Reclaimable",
 	"Movable",
-	"Reserve",
 #ifdef CONFIG_CMA
 	"CMA",
 #endif
-- 
cgit v1.2.3


From 0aaa29a56e4fb0fc9e24edb649e2733a672ca099 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:37 -0800
Subject: mm, page_alloc: reserve pageblocks for high-order atomic allocations
 on demand

High-order watermark checking exists for two reasons -- kswapd high-order
awareness and protection for high-order atomic requests.  Historically the
kernel depended on MIGRATE_RESERVE to preserve min_free_kbytes as
high-order free pages for as long as possible.  This patch introduces
MIGRATE_HIGHATOMIC that reserves pageblocks for high-order atomic
allocations on demand and avoids using those blocks for order-0
allocations.  This is more flexible and reliable than MIGRATE_RESERVE was.

A MIGRATE_HIGHORDER pageblock is created when an atomic high-order
allocation request steals a pageblock but limits the total number to 1% of
the zone.  Callers that speculatively abuse atomic allocations for
long-lived high-order allocations to access the reserve will quickly fail.
 Note that SLUB is currently not such an abuser as it reclaims at least
once.  It is possible that the pageblock stolen has few suitable
high-order pages and will need to steal again in the near future but there
would need to be strong justification to search all pageblocks for an
ideal candidate.

The pageblocks are unreserved if an allocation fails after a direct
reclaim attempt.

The watermark checks account for the reserved pageblocks when the
allocation request is not a high-order atomic allocation.

The reserved pageblocks can not be used for order-0 allocations.  This may
allow temporary wastage until a failed reclaim reassigns the pageblock.
This is deliberate as the intent of the reservation is to satisfy a
limited number of atomic high-order short-lived requests if the system
requires them.

The stutter benchmark was used to evaluate this but while it was running
there was a systemtap script that randomly allocated between 1 high-order
page and 12.5% of memory's worth of order-3 pages using GFP_ATOMIC.  This
is much larger than the potential reserve and it does not attempt to be
realistic.  It is intended to stress random high-order allocations from an
unknown source, show that there is a reduction in failures without
introducing an anomaly where atomic allocations are more reliable than
regular allocations.  The amount of memory reserved varied throughout the
workload as reserves were created and reclaimed under memory pressure.
The allocation failures once the workload warmed up were as follows;

4.2-rc5-vanilla		70%
4.2-rc5-atomic-reserve	56%

The failure rate was also measured while building multiple kernels.  The
failure rate was 14% but is 6% with this patch applied.

Overall, this is a small reduction but the reserves are small relative to
the number of allocation requests.  In early versions of the patch, the
failure rate reduced by a much larger amount but that required much larger
reserves and perversely made atomic allocations seem more reliable than
regular allocations.

[yalin.wang2010@gmail.com: fix redundant check and a memory leak]
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: yalin wang <yalin.wang2010@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |   6 ++-
 mm/page_alloc.c        | 138 ++++++++++++++++++++++++++++++++++++++++++++++---
 mm/vmstat.c            |   1 +
 3 files changed, 135 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b86cfa3313cf..d3bafe4ff32b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -39,6 +39,8 @@ enum {
 	MIGRATE_UNMOVABLE,
 	MIGRATE_MOVABLE,
 	MIGRATE_RECLAIMABLE,
+	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
+	MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
 #ifdef CONFIG_CMA
 	/*
 	 * MIGRATE_CMA migration type is designed to mimic the way
@@ -61,8 +63,6 @@ enum {
 	MIGRATE_TYPES
 };
 
-#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
-
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
 #else
@@ -334,6 +334,8 @@ struct zone {
 	/* zone watermarks, access with *_wmark_pages(zone) macros */
 	unsigned long watermark[NR_WMARK];
 
+	unsigned long nr_reserved_highatomic;
+
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 588812614377..55e9c56dfe54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1615,6 +1615,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 	return -1;
 }
 
+/*
+ * Reserve a pageblock for exclusive use of high-order atomic allocations if
+ * there are no empty page blocks that contain a page with a suitable order
+ */
+static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
+				unsigned int alloc_order)
+{
+	int mt;
+	unsigned long max_managed, flags;
+
+	/*
+	 * Limit the number reserved to 1 pageblock or roughly 1% of a zone.
+	 * Check is race-prone but harmless.
+	 */
+	max_managed = (zone->managed_pages / 100) + pageblock_nr_pages;
+	if (zone->nr_reserved_highatomic >= max_managed)
+		return;
+
+	spin_lock_irqsave(&zone->lock, flags);
+
+	/* Recheck the nr_reserved_highatomic limit under the lock */
+	if (zone->nr_reserved_highatomic >= max_managed)
+		goto out_unlock;
+
+	/* Yoink! */
+	mt = get_pageblock_migratetype(page);
+	if (mt != MIGRATE_HIGHATOMIC &&
+			!is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
+		zone->nr_reserved_highatomic += pageblock_nr_pages;
+		set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
+		move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Used when an allocation is about to fail under memory pressure. This
+ * potentially hurts the reliability of high-order allocations when under
+ * intense memory pressure but failed atomic allocations should be easier
+ * to recover from than an OOM.
+ */
+static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
+{
+	struct zonelist *zonelist = ac->zonelist;
+	unsigned long flags;
+	struct zoneref *z;
+	struct zone *zone;
+	struct page *page;
+	int order;
+
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+								ac->nodemask) {
+		/* Preserve at least one pageblock */
+		if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
+			continue;
+
+		spin_lock_irqsave(&zone->lock, flags);
+		for (order = 0; order < MAX_ORDER; order++) {
+			struct free_area *area = &(zone->free_area[order]);
+
+			if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+				continue;
+
+			page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next,
+						struct page, lru);
+
+			/*
+			 * It should never happen but changes to locking could
+			 * inadvertently allow a per-cpu drain to add pages
+			 * to MIGRATE_HIGHATOMIC while unreserving so be safe
+			 * and watch for underflows.
+			 */
+			zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
+				zone->nr_reserved_highatomic);
+
+			/*
+			 * Convert to ac->migratetype and avoid the normal
+			 * pageblock stealing heuristics. Minimally, the caller
+			 * is doing the work and needs the pages. More
+			 * importantly, if the block was always converted to
+			 * MIGRATE_UNMOVABLE or another type then the number
+			 * of pageblocks that cannot be completely freed
+			 * may increase.
+			 */
+			set_pageblock_migratetype(page, ac->migratetype);
+			move_freepages_block(zone, page, ac->migratetype);
+			spin_unlock_irqrestore(&zone->lock, flags);
+			return;
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+}
+
 /* Remove an element from the buddy allocator from the fallback list */
 static inline struct page *
 __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
@@ -1670,7 +1765,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
  * Call me with the zone->lock already held.
  */
 static struct page *__rmqueue(struct zone *zone, unsigned int order,
-						int migratetype)
+				int migratetype, gfp_t gfp_flags)
 {
 	struct page *page;
 
@@ -1700,7 +1795,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
-		struct page *page = __rmqueue(zone, order, migratetype);
+		struct page *page = __rmqueue(zone, order, migratetype, 0);
 		if (unlikely(page == NULL))
 			break;
 
@@ -2072,7 +2167,7 @@ int split_free_page(struct page *page)
 static inline
 struct page *buffered_rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
-			gfp_t gfp_flags, int migratetype)
+			gfp_t gfp_flags, int alloc_flags, int migratetype)
 {
 	unsigned long flags;
 	struct page *page;
@@ -2115,7 +2210,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 			WARN_ON_ONCE(order > 1);
 		}
 		spin_lock_irqsave(&zone->lock, flags);
-		page = __rmqueue(zone, order, migratetype);
+
+		page = NULL;
+		if (alloc_flags & ALLOC_HARDER) {
+			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+			if (page)
+				trace_mm_page_alloc_zone_locked(page, order, migratetype);
+		}
+		if (!page)
+			page = __rmqueue(zone, order, migratetype, gfp_flags);
 		spin_unlock(&zone->lock);
 		if (!page)
 			goto failed;
@@ -2226,15 +2329,24 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 			unsigned long mark, int classzone_idx, int alloc_flags,
 			long free_pages)
 {
-	/* free_pages may go negative - that's OK */
 	long min = mark;
 	int o;
 	long free_cma = 0;
 
+	/* free_pages may go negative - that's OK */
 	free_pages -= (1 << order) - 1;
+
 	if (alloc_flags & ALLOC_HIGH)
 		min -= min / 2;
-	if (alloc_flags & ALLOC_HARDER)
+
+	/*
+	 * If the caller does not have rights to ALLOC_HARDER then subtract
+	 * the high-atomic reserves. This will over-estimate the size of the
+	 * atomic reserve but it avoids a search.
+	 */
+	if (likely(!(alloc_flags & ALLOC_HARDER)))
+		free_pages -= z->nr_reserved_highatomic;
+	else
 		min -= min / 4;
 
 #ifdef CONFIG_CMA
@@ -2419,10 +2531,18 @@ zonelist_scan:
 
 try_this_zone:
 		page = buffered_rmqueue(ac->preferred_zone, zone, order,
-						gfp_mask, ac->migratetype);
+				gfp_mask, alloc_flags, ac->migratetype);
 		if (page) {
 			if (prep_new_page(page, order, gfp_mask, alloc_flags))
 				goto try_this_zone;
+
+			/*
+			 * If this is a high-order atomic allocation then check
+			 * if the pageblock should be reserved for the future
+			 */
+			if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
+				reserve_highatomic_pageblock(page, zone, order);
+
 			return page;
 		}
 	}
@@ -2695,9 +2815,11 @@ retry:
 
 	/*
 	 * If an allocation failed after direct reclaim, it could be because
-	 * pages are pinned on the per-cpu lists. Drain them and try again
+	 * pages are pinned on the per-cpu lists or in high alloc reserves.
+	 * Shrink them them and try again
 	 */
 	if (!page && !drained) {
+		unreserve_highatomic_pageblock(ac);
 		drain_all_pages(NULL);
 		drained = true;
 		goto retry;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 5b289dcdcccf..879a2be23325 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -923,6 +923,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
 	"Unmovable",
 	"Reclaimable",
 	"Movable",
+	"HighAtomic",
 #ifdef CONFIG_CMA
 	"CMA",
 #endif
-- 
cgit v1.2.3


From dd56b046426760aa0c852ad6e4b6b07891222d65 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 6 Nov 2015 16:28:43 -0800
Subject: mm: page_alloc: hide some GFP internals and document the bits and
 flag combinations

Andrew stated the following

	We have quite a history of remote parts of the kernel using
	weird/wrong/inexplicable combinations of __GFP_ flags.	I tend
	to think that this is because we didn't adequately explain the
	interface.

	And I don't think that gfp.h really improved much in this area as
	a result of this patchset.  Could you go through it some time and
	decide if we've adequately documented all this stuff?

This patches first moves some GFP flag combinations that are part of the MM
internals to mm/internal.h. The rest of the patch documents the __GFP_FOO
bits under various headings and then documents the flag combinations. It
will not help callers that are brain damaged but the clarity might motivate
some fixes and avoid future mistakes.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 251 +++++++++++++++++++++++++++++++++++-----------------
 mm/internal.h       |  19 ++++
 mm/shmem.c          |   2 +
 mm/vmalloc.c        |   2 +
 4 files changed, 194 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 369227202ac2..6523109e136d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,9 +39,7 @@ struct vm_area_struct;
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
- * GFP bitmasks..
- *
- * Zone modifiers (see linux/mmzone.h - low three bits)
+ * Physical address zone modifiers (see linux/mmzone.h - low four bits)
  *
  * Do not put any conditional on these. If necessary modify the definitions
  * without the underscores and use them consistently. The definitions here may
@@ -51,120 +49,211 @@ struct vm_area_struct;
 #define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
 #define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
 #define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
+#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+
 /*
- * Action modifiers - doesn't change the zoning
+ * Page mobility and placement hints
  *
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- * _might_ fail.  This depends upon the particular VM implementation.
+ * These flags provide hints about how mobile the page is. Pages with similar
+ * mobility are placed within the same pageblocks to minimise problems due
+ * to external fragmentation.
  *
- * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures. New users should be evaluated carefully
- * (and the flag should be used only when there is no reasonable failure policy)
- * but it is definitely preferable to use the flag rather than opencode endless
- * loop around allocator.
+ * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ *   moved by page migration during memory compaction or can be reclaimed.
  *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- * return NULL when direct reclaim and memory compaction have failed to allow
- * the allocation to succeed.  The OOM killer is not called with the current
- * implementation.
+ * __GFP_RECLAIMABLE is used for slab allocations that specify
+ *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ *
+ * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ *   these pages will be spread between local zones to avoid all the dirty
+ *   pages being in one zone (fair zone allocation policy).
  *
- * __GFP_MOVABLE: Flag that this page will be movable by the page migration
- * mechanism or reclaimed
+ * __GFP_HARDWALL enforces the cpuset memory allocation policy.
+ *
+ * __GFP_THISNODE forces the allocation to be satisified from the requested
+ *   node with no fallbacks or placement policy enforcements.
  */
-#define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)  /* Caller cannot wait or reschedule */
-#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)	/* Should access emergency pools? */
-#define __GFP_IO	((__force gfp_t)___GFP_IO)	/* Can start physical IO? */
-#define __GFP_FS	((__force gfp_t)___GFP_FS)	/* Can call down to low-level FS? */
-#define __GFP_COLD	((__force gfp_t)___GFP_COLD)	/* Cache-cold page required */
-#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)	/* Suppress page allocation failure warning */
-#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)	/* See above */
-#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)	/* See above */
-#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY) /* See above */
-#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
-#define __GFP_COMP	((__force gfp_t)___GFP_COMP)	/* Add compound page metadata */
-#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)	/* Return zeroed page on success */
-#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
-							 * This takes precedence over the
-							 * __GFP_MEMALLOC flag if both are
-							 * set
-							 */
-#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
-#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
-#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
-#define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
-#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
-
-#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
+#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
+#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL)
+#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)
 
 /*
- * A caller that is willing to wait may enter direct reclaim and will
- * wake kswapd to reclaim pages in the background until the high
- * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to
- * avoid unnecessary delays when a fallback option is available but
- * still allow kswapd to reclaim in the background. The kswapd flag
- * can be cleared when the reclaiming of pages would cause unnecessary
- * disruption.
+ * Watermark modifiers -- controls access to emergency reserves
+ *
+ * __GFP_HIGH indicates that the caller is high-priority and that granting
+ *   the request is necessary before the system can make forward progress.
+ *   For example, creating an IO context to clean pages.
+ *
+ * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
+ *   high priority. Users are typically interrupt handlers. This may be
+ *   used in conjunction with __GFP_HIGH
+ *
+ * __GFP_MEMALLOC allows access to all memory. This should only be used when
+ *   the caller guarantees the allocation will allow more memory to be freed
+ *   very shortly e.g. process exiting or swapping. Users either should
+ *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ *
+ * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
+ *
+ * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
  */
-#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)
+#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)
+#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
+#define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT)
+
+/*
+ * Reclaim modifiers
+ *
+ * __GFP_IO can start physical IO.
+ *
+ * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ *   allocator recursing into the filesystem which might already be holding
+ *   locks.
+ *
+ * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ *   This flag can be cleared to avoid unnecessary delays when a fallback
+ *   option is available.
+ *
+ * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ *   the low watermark is reached and have it reclaim pages until the high
+ *   watermark is reached. A caller may wish to clear this flag when fallback
+ *   options are available and the reclaim is likely to disrupt the system. The
+ *   canonical example is THP allocation where a fallback is cheap but
+ *   reclaim/compaction may cause indirect stalls.
+ *
+ * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ *
+ * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
+ *   _might_ fail.  This depends upon the particular VM implementation.
+ *
+ * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ *   cannot handle allocation failures. New users should be evaluated carefully
+ *   (and the flag should be used only when there is no reasonable failure
+ *   policy) but it is definitely preferable to use the flag rather than
+ *   opencode endless loop around allocator.
+ *
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
+ *   return NULL when direct reclaim and memory compaction have failed to allow
+ *   the allocation to succeed.  The OOM killer is not called with the current
+ *   implementation.
+ */
+#define __GFP_IO	((__force gfp_t)___GFP_IO)
+#define __GFP_FS	((__force gfp_t)___GFP_FS)
 #define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)
+#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
+#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 
 /*
- * This may seem redundant, but it's a way of annotating false positives vs.
- * allocations that simply cannot be supported (e.g. page tables).
+ * Action modifiers
+ *
+ * __GFP_COLD indicates that the caller does not expect to be used in the near
+ *   future. Where possible, a cache-cold page will be returned.
+ *
+ * __GFP_NOWARN suppresses allocation failure reports.
+ *
+ * __GFP_COMP address compound page metadata.
+ *
+ * __GFP_ZERO returns a zeroed page on success.
+ *
+ * __GFP_NOTRACK avoids tracking with kmemcheck.
+ *
+ * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
+ *   distinguishing in the source between false positives and allocations that
+ *   cannot be supported (e.g. page tables).
+ *
+ * __GFP_OTHER_NODE is for allocations that are on a remote node but that
+ *   should not be accounted for as a remote allocation in vmstat. A
+ *   typical user would be khugepaged collapsing a huge page on a remote
+ *   node.
  */
+#define __GFP_COLD	((__force gfp_t)___GFP_COLD)
+#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
+#define __GFP_COMP	((__force gfp_t)___GFP_COMP)
+#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
+#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
+#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
 
-#define __GFP_BITS_SHIFT 26	/* Room for N __GFP_FOO bits */
+/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 26
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /*
- * GFP_ATOMIC callers can not sleep, need the allocation to succeed.
- * A lower watermark is applied to allow access to "atomic reserves"
+ * Useful GFP flag combinations that are commonly used. It is recommended
+ * that subsystems start with one of these combinations and then set/clear
+ * __GFP_FOO flags as necessary.
+ *
+ * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ *   watermark is applied to allow access to "atomic reserves"
+ *
+ * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * GFP_NOWAIT is for kernel allocations that should not stall for direct
+ *   reclaim, start physical IO or use any filesystem callback.
+ *
+ * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ *   that do not require the starting of any physical IO.
+ *
+ * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ *
+ * GFP_USER is for userspace allocations that also need to be directly
+ *   accessibly by the kernel or hardware. It is typically used by hardware
+ *   for buffers that are mapped to userspace (e.g. graphics) that hardware
+ *   still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * GFP_DMA exists for historical reasons and should be avoided where possible.
+ *   The flags indicates that the caller requires that the lowest zone be
+ *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ *   it would require careful auditing as some users really require it and
+ *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
+ *   lowest zone as a type of emergency reserve.
+ *
+ * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
+ *   address.
+ *
+ * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ *   do not need to be directly accessible by the kernel but that cannot
+ *   move once in use. An example may be a hardware allocation that maps
+ *   data directly into userspace but has no addressing limitations.
+ *
+ * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ *   need direct access to but can use kmap() when access is required. They
+ *   are expected to be movable via page reclaim or page migration. Typically,
+ *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
+ *
+ * GFP_TRANSHUGE is used for THP allocations. They are compound allocations
+ *   that will fail quickly if memory is not available and will not wake
+ *   kswapd on failure.
  */
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
 #define GFP_NOIO	(__GFP_RECLAIM)
 #define GFP_NOFS	(__GFP_RECLAIM | __GFP_IO)
-#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_TEMPORARY	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
 			 __GFP_RECLAIMABLE)
 #define GFP_USER	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_DMA		__GFP_DMA
+#define GFP_DMA32	__GFP_DMA32
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
 #define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
 			 ~__GFP_KSWAPD_RECLAIM)
 
-/* This mask makes up all the page movable related flags */
+/* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
 #define GFP_MOVABLE_SHIFT 3
 
-/* Control page allocator reclaim behavior */
-#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
-
-/* Control slab gfp mask during early boot */
-#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
-
-/* Control allocation constraints */
-#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
-
-/* Do not use these with a slab allocator */
-#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
-
-/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
-   platforms, used as appropriate on others */
-
-#define GFP_DMA		__GFP_DMA
-
-/* 4GB DMA on some platforms */
-#define GFP_DMA32	__GFP_DMA32
-
-/* Convert GFP flags to their corresponding migrate type */
 static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
 	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
@@ -177,6 +266,8 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 	/* Group based on mobility */
 	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
 }
+#undef GFP_MOVABLE_MASK
+#undef GFP_MOVABLE_SHIFT
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
diff --git a/mm/internal.h b/mm/internal.h
index ff0f1ada0f67..5b7841f6fa27 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -14,6 +14,25 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 
+/*
+ * The set of flags that only affect watermark checking and reclaim
+ * behaviour. This is used by the MM to obey the caller constraints
+ * about IO, FS and watermark checking while ignoring placement
+ * hints such as HIGHMEM usage.
+ */
+#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+
+/* The GFP flags allowed during early boot */
+#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
+
+/* Control allocation cpuset and node placement constraints */
+#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
+
+/* Do not use these with a slab allocator */
+#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
+
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 3b8b73928398..9187eee4128b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -73,6 +73,8 @@ static struct vfsmount *shm_mnt;
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
+#include "internal.h"
+
 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7ee94dc10000..d04563480c94 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -35,6 +35,8 @@
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
+#include "internal.h"
+
 struct vfree_deferred {
 	struct llist_head list;
 	struct work_struct wq;
-- 
cgit v1.2.3


From 89903327607232de32f05100cf03f9390b858e0b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 6 Nov 2015 16:28:46 -0800
Subject: include/linux/mmzone.h: reflow comment

Someone has an 86 column display.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d3bafe4ff32b..e23a9e704536 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -337,12 +337,13 @@ struct zone {
 	unsigned long nr_reserved_highatomic;
 
 	/*
-	 * We don't know if the memory that we're going to allocate will be freeable
-	 * or/and it will be released eventually, so to avoid totally wasting several
-	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
-	 * to run OOM on the lower zones despite there's tons of freeable ram
-	 * on the higher zones). This array is recalculated at runtime if the
-	 * sysctl_lowmem_reserve_ratio sysctl changes.
+	 * We don't know if the memory that we're going to allocate will be
+	 * freeable or/and it will be released eventually, so to avoid totally
+	 * wasting several GB of ram we must reserve some of the lower zone
+	 * memory (otherwise we risk to run OOM on the lower zones despite
+	 * there being tons of freeable ram on the higher zones).  This array is
+	 * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
+	 * changes.
 	 */
 	long lowmem_reserve[MAX_NR_ZONES];
 
-- 
cgit v1.2.3


From c62d25556be6c965dc14288e796a576e8e39a7e9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 6 Nov 2015 16:28:49 -0800
Subject: mm, fs: introduce mapping_gfp_constraint()

There are many places which use mapping_gfp_mask to restrict a more
generic gfp mask which would be used for allocations which are not
directly related to the page cache but they are performed in the same
context.

Let's introduce a helper function which makes the restriction explicit and
easier to track.  This patch doesn't introduce any functional changes.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Michal Hocko <mhocko@suse.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_gem.c       | 2 +-
 drivers/gpu/drm/i915/i915_gem.c | 3 +--
 fs/btrfs/compression.c          | 7 +++----
 fs/btrfs/ctree.h                | 2 +-
 fs/btrfs/free-space-cache.c     | 4 ++--
 fs/buffer.c                     | 2 +-
 fs/ceph/addr.c                  | 7 ++++---
 fs/cifs/file.c                  | 2 +-
 fs/ext4/inode.c                 | 2 +-
 fs/ext4/readpage.c              | 2 +-
 fs/logfs/segment.c              | 2 +-
 fs/mpage.c                      | 4 ++--
 fs/namei.c                      | 2 +-
 fs/nilfs2/inode.c               | 4 ++--
 fs/ntfs/file.c                  | 4 ++--
 fs/splice.c                     | 2 +-
 include/linux/pagemap.h         | 7 +++++++
 mm/filemap.c                    | 4 ++--
 mm/readahead.c                  | 4 ++--
 19 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 3c2d4abd71c5..1d47d2e9487c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -491,7 +491,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
 		 * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping)
 		 * so shmem can relocate pages during swapin if required.
 		 */
-		BUG_ON((mapping_gfp_mask(mapping) & __GFP_DMA32) &&
+		BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) &&
 				(page_to_pfn(p) >= 0x00100000UL));
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e505d4be7c0..399aab265db3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2214,9 +2214,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 * Fail silently without starting the shrinker
 	 */
 	mapping = file_inode(obj->base.filp)->i_mapping;
-	gfp = mapping_gfp_mask(mapping);
+	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
-	gfp &= ~(__GFP_IO | __GFP_RECLAIM);
 	sg = st->sgl;
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 57ee8ca29b06..36dfeff2c1f4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -482,13 +482,12 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 			goto next;
 		}
 
-		page = __page_cache_alloc(mapping_gfp_mask(mapping) &
-								~__GFP_FS);
+		page = __page_cache_alloc(mapping_gfp_constraint(mapping,
+								 ~__GFP_FS));
 		if (!page)
 			break;
 
-		if (add_to_page_cache_lru(page, mapping, pg_index,
-								GFP_NOFS)) {
+		if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
 			page_cache_release(page);
 			goto next;
 		}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 938efe33be80..eb90f0f1a124 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3316,7 +3316,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 
 static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
 {
-	return mapping_gfp_mask(mapping) & ~__GFP_FS;
+	return mapping_gfp_constraint(mapping, ~__GFP_FS);
 }
 
 /* extent-tree.c */
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index abe3a66bd3ba..ed05da1b977e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,8 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
 	}
 
 	mapping_set_gfp_mask(inode->i_mapping,
-			mapping_gfp_mask(inode->i_mapping) &
-			~(__GFP_FS | __GFP_HIGHMEM));
+			mapping_gfp_constraint(inode->i_mapping,
+			~(__GFP_FS | __GFP_HIGHMEM)));
 
 	return inode;
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 82283abb2795..51aff0296ce2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -999,7 +999,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	int ret = 0;		/* Will call free_more_memory() */
 	gfp_t gfp_mask;
 
-	gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+	gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
 
 	/*
 	 * XXX: __getblk_slow() can not really deal with failure and
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 9d23e788d1df..b7d218a168fb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1283,8 +1283,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		int ret1;
 		struct address_space *mapping = inode->i_mapping;
 		struct page *page = find_or_create_page(mapping, 0,
-						mapping_gfp_mask(mapping) &
-						~__GFP_FS);
+						mapping_gfp_constraint(mapping,
+						~__GFP_FS));
 		if (!page) {
 			ret = VM_FAULT_OOM;
 			goto out;
@@ -1428,7 +1428,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 		if (i_size_read(inode) == 0)
 			return;
 		page = find_or_create_page(mapping, 0,
-					   mapping_gfp_mask(mapping) & ~__GFP_FS);
+					   mapping_gfp_constraint(mapping,
+					   ~__GFP_FS));
 		if (!page)
 			return;
 		if (PageUptodate(page)) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 47c5c97e2dd3..0068e82217c3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3380,7 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
 	struct page *page, *tpage;
 	unsigned int expected_index;
 	int rc;
-	gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
+	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
 
 	INIT_LIST_HEAD(tmplist);
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 612fbcf76b5c..60aaecd5598b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3344,7 +3344,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 	int err = 0;
 
 	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
-				   mapping_gfp_mask(mapping) & ~__GFP_FS);
+				   mapping_gfp_constraint(mapping, ~__GFP_FS));
 	if (!page)
 		return -ENOMEM;
 
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 560af0437704..1061611ae14d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -166,7 +166,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 			page = list_entry(pages->prev, struct page, lru);
 			list_del(&page->lru);
 			if (add_to_page_cache_lru(page, mapping, page->index,
-					GFP_KERNEL & mapping_gfp_mask(mapping)))
+				  mapping_gfp_constraint(mapping, GFP_KERNEL)))
 				goto next_page;
 		}
 
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 7f9b096d8d57..6de0fbfc6c00 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -57,7 +57,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
 	filler_t *filler = super->s_devops->readpage;
 	struct page *page;
 
-	BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
+	BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS));
 	if (use_filler)
 		page = read_cache_page(mapping, index, filler, sb);
 	else {
diff --git a/fs/mpage.c b/fs/mpage.c
index 09abba7653aa..1480d3a18037 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -361,7 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	sector_t last_block_in_bio = 0;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
-	gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
+	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
 
 	map_bh.b_state = 0;
 	map_bh.b_size = 0;
@@ -397,7 +397,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 	sector_t last_block_in_bio = 0;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
-	gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping);
+	gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
 
 	map_bh.b_state = 0;
 	map_bh.b_size = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 0d3340b32e14..3c18970a8899 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4604,7 +4604,7 @@ EXPORT_SYMBOL(__page_symlink);
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
 	return __page_symlink(inode, symname, len,
-			!(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
+			!mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
 }
 EXPORT_SYMBOL(page_symlink);
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 4a73d6dffabf..ac2f64943ff4 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -356,7 +356,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 		goto failed;
 
 	mapping_set_gfp_mask(inode->i_mapping,
-			     mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
 
 	root = NILFS_I(dir)->i_root;
 	ii = NILFS_I(inode);
@@ -522,7 +522,7 @@ static int __nilfs_read_inode(struct super_block *sb,
 	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
 	nilfs_set_inode_flags(inode);
 	mapping_set_gfp_mask(inode->i_mapping,
-			     mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
 	return 0;
 
  failed_unmap:
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 262561fea923..9d383e5eff0e 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -525,8 +525,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
 				}
 			}
 			err = add_to_page_cache_lru(*cached_page, mapping,
-					index,
-					GFP_KERNEL & mapping_gfp_mask(mapping));
+				   index,
+				   mapping_gfp_constraint(mapping, GFP_KERNEL));
 			if (unlikely(err)) {
 				if (err == -EEXIST)
 					continue;
diff --git a/fs/splice.c b/fs/splice.c
index 5fc1e50a7f30..801c21cd77fe 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 				break;
 
 			error = add_to_page_cache_lru(page, mapping, index,
-					GFP_KERNEL & mapping_gfp_mask(mapping));
+				   mapping_gfp_constraint(mapping, GFP_KERNEL));
 			if (unlikely(error)) {
 				page_cache_release(page);
 				if (error == -EEXIST)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a6c78e00ea96..26eabf5ec718 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -69,6 +69,13 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 	return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
 }
 
+/* Restricts the given gfp_mask to what the mapping allows. */
+static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
+		gfp_t gfp_mask)
+{
+	return mapping_gfp_mask(mapping) & gfp_mask;
+}
+
 /*
  * This is non-atomic.  Only to be used before the mapping is activated.
  * Probably needs a barrier...
diff --git a/mm/filemap.c b/mm/filemap.c
index 6ef3674c0763..1bb007624b53 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1722,7 +1722,7 @@ no_cached_page:
 			goto out;
 		}
 		error = add_to_page_cache_lru(page, mapping, index,
-					GFP_KERNEL & mapping_gfp_mask(mapping));
+				mapping_gfp_constraint(mapping, GFP_KERNEL));
 		if (error) {
 			page_cache_release(page);
 			if (error == -EEXIST) {
@@ -1824,7 +1824,7 @@ static int page_cache_read(struct file *file, pgoff_t offset)
 			return -ENOMEM;
 
 		ret = add_to_page_cache_lru(page, mapping, offset,
-				GFP_KERNEL & mapping_gfp_mask(mapping));
+				mapping_gfp_constraint(mapping, GFP_KERNEL));
 		if (ret == 0)
 			ret = mapping->a_ops->readpage(file, page);
 		else if (ret == -EEXIST)
diff --git a/mm/readahead.c b/mm/readahead.c
index 998ad592f408..ba22d7fe0afb 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -90,7 +90,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 		page = list_to_page(pages);
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping, page->index,
-				GFP_KERNEL & mapping_gfp_mask(mapping))) {
+				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
 			read_cache_pages_invalidate_page(mapping, page);
 			continue;
 		}
@@ -128,7 +128,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 		struct page *page = list_to_page(pages);
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping, page->index,
-				GFP_KERNEL & mapping_gfp_mask(mapping))) {
+				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
 			mapping->a_ops->readpage(filp, page);
 		}
 		page_cache_release(page);
-- 
cgit v1.2.3


From 3d9c637f4ae74b45d95bb6cbd793fbffad0a709c Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 6 Nov 2015 16:29:12 -0800
Subject: module: export param_free_charp()

Change the param_free_charp() function from static to exported.

It is used by zswap in the next patch ("zswap: use charp for zswap param
strings").

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Seth Jennings <sjennings@variantweb.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/moduleparam.h | 1 +
 kernel/params.c             | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index c12f2147c350..52666d90ca94 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -386,6 +386,7 @@ extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
 extern const struct kernel_param_ops param_ops_charp;
 extern int param_set_charp(const char *val, const struct kernel_param *kp);
 extern int param_get_charp(char *buffer, const struct kernel_param *kp);
+extern void param_free_charp(void *arg);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
 /* We used to allow int as well as bool.  We're taking that away! */
diff --git a/kernel/params.c b/kernel/params.c
index b6554aa71094..93a380a2345d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -325,10 +325,11 @@ int param_get_charp(char *buffer, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL(param_get_charp);
 
-static void param_free_charp(void *arg)
+void param_free_charp(void *arg)
 {
 	maybe_kfree_parameter(*((char **)arg));
 }
+EXPORT_SYMBOL(param_free_charp);
 
 const struct kernel_param_ops param_ops_charp = {
 	.set = param_set_charp,
-- 
cgit v1.2.3


From 69e18f4dbedfbf208452e9da9979c92da30d2442 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 6 Nov 2015 16:29:18 -0800
Subject: zpool: remove redundant zpool->type string, const-ify zpool_get_type

Make the return type of zpool_get_type const; the string belongs to the
zpool driver and should not be modified.  Remove the redundant type field
in the struct zpool; it is private to zpool.c and isn't needed since
->driver->type can be used directly.  Add comments indicating strings must
be null-terminated.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Seth Jennings <sjennings@variantweb.net>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h |  2 +-
 mm/zpool.c            | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 42f8ec992452..1f405bee3cd5 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -41,7 +41,7 @@ bool zpool_has_pool(char *type);
 struct zpool *zpool_create_pool(char *type, char *name,
 			gfp_t gfp, const struct zpool_ops *ops);
 
-char *zpool_get_type(struct zpool *pool);
+const char *zpool_get_type(struct zpool *pool);
 
 void zpool_destroy_pool(struct zpool *pool);
 
diff --git a/mm/zpool.c b/mm/zpool.c
index 8f670d3e8706..13f524dcf215 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -18,8 +18,6 @@
 #include <linux/zpool.h>
 
 struct zpool {
-	char *type;
-
 	struct zpool_driver *driver;
 	void *pool;
 	const struct zpool_ops *ops;
@@ -73,6 +71,7 @@ int zpool_unregister_driver(struct zpool_driver *driver)
 }
 EXPORT_SYMBOL(zpool_unregister_driver);
 
+/* this assumes @type is null-terminated. */
 static struct zpool_driver *zpool_get_driver(char *type)
 {
 	struct zpool_driver *driver;
@@ -113,6 +112,8 @@ static void zpool_put_driver(struct zpool_driver *driver)
  * not be loaded, and calling @zpool_create_pool() with the pool type will
  * fail.
  *
+ * The @type string must be null-terminated.
+ *
  * Returns: true if @type pool is available, false if not
  */
 bool zpool_has_pool(char *type)
@@ -145,6 +146,8 @@ EXPORT_SYMBOL(zpool_has_pool);
  *
  * Implementations must guarantee this to be thread-safe.
  *
+ * The @type and @name strings must be null-terminated.
+ *
  * Returns: New zpool on success, NULL on failure.
  */
 struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
@@ -174,7 +177,6 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
 		return NULL;
 	}
 
-	zpool->type = driver->type;
 	zpool->driver = driver;
 	zpool->pool = driver->create(name, gfp, ops, zpool);
 	zpool->ops = ops;
@@ -208,7 +210,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
  */
 void zpool_destroy_pool(struct zpool *zpool)
 {
-	pr_debug("destroying pool type %s\n", zpool->type);
+	pr_debug("destroying pool type %s\n", zpool->driver->type);
 
 	spin_lock(&pools_lock);
 	list_del(&zpool->list);
@@ -228,9 +230,9 @@ void zpool_destroy_pool(struct zpool *zpool)
  *
  * Returns: The type of zpool.
  */
-char *zpool_get_type(struct zpool *zpool)
+const char *zpool_get_type(struct zpool *zpool)
 {
-	return zpool->type;
+	return zpool->driver->type;
 }
 
 /**
-- 
cgit v1.2.3


From 6f3526d6db7cbe8b53e42d6bf0cad2072afcf3fe Mon Sep 17 00:00:00 2001
From: Sergey SENOZHATSKY <sergey.senozhatsky@gmail.com>
Date: Fri, 6 Nov 2015 16:29:21 -0800
Subject: mm: zsmalloc: constify struct zs_pool name

Constify `struct zs_pool' ->name.

[akpm@inux-foundation.org: constify zpool_create_pool()'s `type' arg also]
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h    |  6 ++++--
 include/linux/zsmalloc.h |  2 +-
 mm/zbud.c                |  2 +-
 mm/zpool.c               |  4 ++--
 mm/zsmalloc.c            | 10 +++++-----
 5 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 1f405bee3cd5..2e97b7707dff 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -38,7 +38,7 @@ enum zpool_mapmode {
 
 bool zpool_has_pool(char *type);
 
-struct zpool *zpool_create_pool(char *type, char *name,
+struct zpool *zpool_create_pool(const char *type, const char *name,
 			gfp_t gfp, const struct zpool_ops *ops);
 
 const char *zpool_get_type(struct zpool *pool);
@@ -83,7 +83,9 @@ struct zpool_driver {
 	atomic_t refcount;
 	struct list_head list;
 
-	void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops,
+	void *(*create)(const char *name,
+			gfp_t gfp,
+			const struct zpool_ops *ops,
 			struct zpool *zpool);
 	void (*destroy)(void *pool);
 
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 6398dfae53f1..34eb16098a33 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -41,7 +41,7 @@ struct zs_pool_stats {
 
 struct zs_pool;
 
-struct zs_pool *zs_create_pool(char *name, gfp_t flags);
+struct zs_pool *zs_create_pool(const char *name, gfp_t flags);
 void zs_destroy_pool(struct zs_pool *pool);
 
 unsigned long zs_malloc(struct zs_pool *pool, size_t size);
diff --git a/mm/zbud.c b/mm/zbud.c
index fa48bcdff9d5..d8a181fd779b 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -137,7 +137,7 @@ static const struct zbud_ops zbud_zpool_ops = {
 	.evict =	zbud_zpool_evict
 };
 
-static void *zbud_zpool_create(char *name, gfp_t gfp,
+static void *zbud_zpool_create(const char *name, gfp_t gfp,
 			       const struct zpool_ops *zpool_ops,
 			       struct zpool *zpool)
 {
diff --git a/mm/zpool.c b/mm/zpool.c
index 13f524dcf215..fd3ff719c32c 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -72,7 +72,7 @@ int zpool_unregister_driver(struct zpool_driver *driver)
 EXPORT_SYMBOL(zpool_unregister_driver);
 
 /* this assumes @type is null-terminated. */
-static struct zpool_driver *zpool_get_driver(char *type)
+static struct zpool_driver *zpool_get_driver(const char *type)
 {
 	struct zpool_driver *driver;
 
@@ -150,7 +150,7 @@ EXPORT_SYMBOL(zpool_has_pool);
  *
  * Returns: New zpool on success, NULL on failure.
  */
-struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp,
+struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
 		const struct zpool_ops *ops)
 {
 	struct zpool_driver *driver;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f135b1b6fcdc..8b8e0dac0a2a 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -237,7 +237,7 @@ struct link_free {
 };
 
 struct zs_pool {
-	char *name;
+	const char *name;
 
 	struct size_class **size_class;
 	struct kmem_cache *handle_cachep;
@@ -311,7 +311,7 @@ static void record_obj(unsigned long handle, unsigned long obj)
 
 #ifdef CONFIG_ZPOOL
 
-static void *zs_zpool_create(char *name, gfp_t gfp,
+static void *zs_zpool_create(const char *name, gfp_t gfp,
 			     const struct zpool_ops *zpool_ops,
 			     struct zpool *zpool)
 {
@@ -548,7 +548,7 @@ static const struct file_operations zs_stat_size_ops = {
 	.release        = single_release,
 };
 
-static int zs_pool_stat_create(char *name, struct zs_pool *pool)
+static int zs_pool_stat_create(const char *name, struct zs_pool *pool)
 {
 	struct dentry *entry;
 
@@ -588,7 +588,7 @@ static void __exit zs_stat_exit(void)
 {
 }
 
-static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
+static inline int zs_pool_stat_create(const char *name, struct zs_pool *pool)
 {
 	return 0;
 }
@@ -1866,7 +1866,7 @@ static int zs_register_shrinker(struct zs_pool *pool)
  * On success, a pointer to the newly created pool is returned,
  * otherwise NULL.
  */
-struct zs_pool *zs_create_pool(char *name, gfp_t flags)
+struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
 {
 	int i;
 	struct zs_pool *pool;
-- 
cgit v1.2.3


From 474e4eeaf26b6c3298ca3ae9d0a705b0853efb2a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:40 -0800
Subject: mm: drop page->slab_page

Since 8456a648cf44 ("slab: use struct page for slab management") nobody
uses slab_page field in struct page.

Let's drop it.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0a85da25a822..c0ec46df6c13 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -131,7 +131,6 @@ struct page {
 #endif
 		};
 
-		struct slab *slab_page; /* slab fields */
 		struct rcu_head rcu_head;	/* Used by SLAB
 						 * when destroying via RCU
 						 */
-- 
cgit v1.2.3


From f1e61557f0230d51a3df8d825f2c156e75563bff Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:50 -0800
Subject: mm: pack compound_dtor and compound_order into one word in struct
 page

The patch halves space occupied by compound_dtor and compound_order in
struct page.

For compound_order, it's trivial long -> short conversion.

For get_compound_page_dtor(), we now use hardcoded table for destructor
lookup and store its index in the struct page instead of direct pointer
to destructor. It shouldn't be a big trouble to maintain the table: we
have only two destructor and NULL currently.

This patch free up one word in tail pages for reuse. This is preparation
for the next patch.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 24 +++++++++++++++++++-----
 include/linux/mm_types.h |  6 ++----
 mm/hugetlb.c             |  8 ++++----
 mm/page_alloc.c          | 11 ++++++++++-
 4 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 906c46a05707..6581c21320cb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -568,18 +568,32 @@ int split_free_page(struct page *page);
 /*
  * Compound pages have a destructor function.  Provide a
  * prototype for that function and accessor functions.
- * These are _only_ valid on the head of a PG_compound page.
+ * These are _only_ valid on the head of a compound page.
  */
+typedef void compound_page_dtor(struct page *);
+
+/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */
+enum compound_dtor_id {
+	NULL_COMPOUND_DTOR,
+	COMPOUND_PAGE_DTOR,
+#ifdef CONFIG_HUGETLB_PAGE
+	HUGETLB_PAGE_DTOR,
+#endif
+	NR_COMPOUND_DTORS,
+};
+extern compound_page_dtor * const compound_page_dtors[];
 
 static inline void set_compound_page_dtor(struct page *page,
-						compound_page_dtor *dtor)
+		enum compound_dtor_id compound_dtor)
 {
-	page[1].compound_dtor = dtor;
+	VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
+	page[1].compound_dtor = compound_dtor;
 }
 
 static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
 {
-	return page[1].compound_dtor;
+	VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page);
+	return compound_page_dtors[page[1].compound_dtor];
 }
 
 static inline int compound_order(struct page *page)
@@ -589,7 +603,7 @@ static inline int compound_order(struct page *page)
 	return page[1].compound_order;
 }
 
-static inline void set_compound_order(struct page *page, unsigned long order)
+static inline void set_compound_order(struct page *page, unsigned int order)
 {
 	page[1].compound_order = order;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c0ec46df6c13..e334ef79cb43 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,8 +28,6 @@ struct mem_cgroup;
 		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
 #define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
 
-typedef void compound_page_dtor(struct page *);
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -136,8 +134,8 @@ struct page {
 						 */
 		/* First tail page of compound page */
 		struct {
-			compound_page_dtor *compound_dtor;
-			unsigned long compound_order;
+			unsigned short int compound_dtor;
+			unsigned short int compound_order;
 		};
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 74ef0c6a25dd..e90a29024c5c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1146,7 +1146,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
 				1 << PG_writeback);
 	}
 	VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-	set_compound_page_dtor(page, NULL);
+	set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
 	set_page_refcounted(page);
 	if (hstate_is_gigantic(h)) {
 		destroy_compound_gigantic_page(page, huge_page_order(h));
@@ -1242,7 +1242,7 @@ void free_huge_page(struct page *page)
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
 	INIT_LIST_HEAD(&page->lru);
-	set_compound_page_dtor(page, free_huge_page);
+	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
 	spin_lock(&hugetlb_lock);
 	set_hugetlb_cgroup(page, NULL);
 	h->nr_huge_pages++;
@@ -1294,7 +1294,7 @@ int PageHuge(struct page *page)
 		return 0;
 
 	page = compound_head(page);
-	return get_compound_page_dtor(page) == free_huge_page;
+	return page[1].compound_dtor == HUGETLB_PAGE_DTOR;
 }
 EXPORT_SYMBOL_GPL(PageHuge);
 
@@ -1568,7 +1568,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	if (page) {
 		INIT_LIST_HEAD(&page->lru);
 		r_nid = page_to_nid(page);
-		set_compound_page_dtor(page, free_huge_page);
+		set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
 		set_hugetlb_cgroup(page, NULL);
 		/*
 		 * We incremented the global counters already
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b8d560afe266..fae1bd6f9f37 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -229,6 +229,15 @@ static char * const zone_names[MAX_NR_ZONES] = {
 #endif
 };
 
+static void free_compound_page(struct page *page);
+compound_page_dtor * const compound_page_dtors[] = {
+	NULL,
+	free_compound_page,
+#ifdef CONFIG_HUGETLB_PAGE
+	free_huge_page,
+#endif
+};
+
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
 
@@ -458,7 +467,7 @@ void prep_compound_page(struct page *page, unsigned long order)
 	int i;
 	int nr_pages = 1 << order;
 
-	set_compound_page_dtor(page, free_compound_page);
+	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
 	set_compound_order(page, order);
 	__SetPageHead(page);
 	for (i = 1; i < nr_pages; i++) {
-- 
cgit v1.2.3


From 1d798ca3f16437c71ff63e36597ff07f9c12e4d6 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:54 -0800
Subject: mm: make compound_head() robust

Hugh has pointed that compound_head() call can be unsafe in some
context. There's one example:

	CPU0					CPU1

isolate_migratepages_block()
  page_count()
    compound_head()
      !!PageTail() == true
					put_page()
					  tail->first_page = NULL
      head = tail->first_page
					alloc_pages(__GFP_COMP)
					   prep_compound_page()
					     tail->first_page = head
					     __SetPageTail(p);
      !!PageTail() == true
    <head == NULL dereferencing>

The race is pure theoretical. I don't it's possible to trigger it in
practice. But who knows.

We can fix the race by changing how encode PageTail() and compound_head()
within struct page to be able to update them in one shot.

The patch introduces page->compound_head into third double word block in
front of compound_dtor and compound_order. Bit 0 encodes PageTail() and
the rest bits are pointer to head page if bit zero is set.

The patch moves page->pmd_huge_pte out of word, just in case if an
architecture defines pgtable_t into something what can have the bit 0
set.

hugetlb_cgroup uses page->lru.next in the second tail page to store
pointer struct hugetlb_cgroup. The patch switch it to use page->private
in the second tail page instead. The space is free since ->first_page is
removed from the union.

The patch also opens possibility to remove HUGETLB_CGROUP_MIN_ORDER
limitation, since there's now space in first tail page to store struct
hugetlb_cgroup pointer. But that's out of scope of the patch.

That means page->compound_head shares storage space with:

 - page->lru.next;
 - page->next;
 - page->rcu_head.next;

That's too long list to be absolutely sure, but looks like nobody uses
bit 0 of the word.

page->rcu_head.next guaranteed[1] to have bit 0 clean as long as we use
call_rcu(), call_rcu_bh(), call_rcu_sched(), or call_srcu(). But future
call_rcu_lazy() is not allowed as it makes use of the bit and we can
get false positive PageTail().

[1] http://lkml.kernel.org/g/20150827163634.GD4029@linux.vnet.ibm.com

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/split_page_table_lock |  4 +-
 arch/xtensa/configs/iss_defconfig      |  1 -
 include/linux/hugetlb_cgroup.h         |  4 +-
 include/linux/mm.h                     | 53 ++--------------------
 include/linux/mm_types.h               | 22 ++++++++--
 include/linux/page-flags.h             | 80 ++++++++--------------------------
 mm/Kconfig                             | 12 -----
 mm/debug.c                             |  5 ---
 mm/huge_memory.c                       |  3 +-
 mm/hugetlb.c                           |  8 +---
 mm/hugetlb_cgroup.c                    |  2 +-
 mm/internal.h                          |  4 +-
 mm/memory-failure.c                    |  7 ---
 mm/page_alloc.c                        | 48 ++++++++++++--------
 mm/swap.c                              |  4 +-
 15 files changed, 82 insertions(+), 175 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
index 6dea4fd5c961..62842a857dab 100644
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock
@@ -54,8 +54,8 @@ everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
 which must be called on PTE table allocation / freeing.
 
 Make sure the architecture doesn't use slab allocator for page table
-allocation: slab uses page->slab_cache and page->first_page for its pages.
-These fields share storage with page->ptl.
+allocation: slab uses page->slab_cache for its pages.
+This field shares storage with page->ptl.
 
 PMD split lock only makes sense if you have more than two page table
 levels.
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index f3dfe0d921c2..44c6764d9146 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -169,7 +169,6 @@ CONFIG_FLATMEM_MANUAL=y
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 7edd30515298..24154c26d469 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -32,7 +32,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
 
 	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
 		return NULL;
-	return (struct hugetlb_cgroup *)page[2].lru.next;
+	return (struct hugetlb_cgroup *)page[2].private;
 }
 
 static inline
@@ -42,7 +42,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
 
 	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
 		return -1;
-	page[2].lru.next = (void *)h_cg;
+	page[2].private	= (unsigned long)h_cg;
 	return 0;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6581c21320cb..9671b6f23eda 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -430,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page,
 #endif
 }
 
-static inline struct page *compound_head_by_tail(struct page *tail)
-{
-	struct page *head = tail->first_page;
-
-	/*
-	 * page->first_page may be a dangling pointer to an old
-	 * compound page, so recheck that it is still a tail
-	 * page before returning.
-	 */
-	smp_rmb();
-	if (likely(PageTail(tail)))
-		return head;
-	return tail;
-}
-
-/*
- * Since either compound page could be dismantled asynchronously in THP
- * or we access asynchronously arbitrary positioned struct page, there
- * would be tail flag race. To handle this race, we should call
- * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
- */
-static inline struct page *compound_head(struct page *page)
-{
-	if (unlikely(PageTail(page)))
-		return compound_head_by_tail(page);
-	return page;
-}
-
-/*
- * If we access compound page synchronously such as access to
- * allocated page, there is no need to handle tail flag race, so we can
- * check tail flag directly without any synchronization primitive.
- */
-static inline struct page *compound_head_fast(struct page *page)
-{
-	if (unlikely(PageTail(page)))
-		return page->first_page;
-	return page;
-}
-
 /*
  * The atomic page->_mapcount, starts from -1: so that transitions
  * both from it and to it can be tracked, using atomic_inc_and_test
@@ -518,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page)
 	VM_BUG_ON_PAGE(!PageTail(page), page);
 	VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
 	VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
-	if (compound_tail_refcounted(page->first_page))
+	if (compound_tail_refcounted(compound_head(page)))
 		atomic_inc(&page->_mapcount);
 }
 
@@ -541,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x)
 {
 	struct page *page = virt_to_page(x);
 
-	/*
-	 * We don't need to worry about synchronization of tail flag
-	 * when we call virt_to_head_page() since it is only called for
-	 * already allocated page and this page won't be freed until
-	 * this virt_to_head_page() is finished. So use _fast variant.
-	 */
-	return compound_head_fast(page);
+	return compound_head(page);
 }
 
 /*
@@ -1586,8 +1540,7 @@ static inline bool ptlock_init(struct page *page)
 	 * with 0. Make sure nobody took it in use in between.
 	 *
 	 * It can happen if arch try to use slab for page table allocation:
-	 * slab code uses page->slab_cache and page->first_page (for tail
-	 * pages), which share storage with page->ptl.
+	 * slab code uses page->slab_cache, which share storage with page->ptl.
 	 */
 	VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
 	if (!ptlock_alloc(page))
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e334ef79cb43..bb91658c603f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -111,7 +111,13 @@ struct page {
 		};
 	};
 
-	/* Third double word block */
+	/*
+	 * Third double word block
+	 *
+	 * WARNING: bit 0 of the first word encode PageTail(). That means
+	 * the rest users of the storage space MUST NOT use the bit to
+	 * avoid collision and false-positive PageTail().
+	 */
 	union {
 		struct list_head lru;	/* Pageout list, eg. active_list
 					 * protected by zone->lru_lock !
@@ -132,14 +138,23 @@ struct page {
 		struct rcu_head rcu_head;	/* Used by SLAB
 						 * when destroying via RCU
 						 */
-		/* First tail page of compound page */
+		/* Tail pages of compound page */
 		struct {
+			unsigned long compound_head; /* If bit zero is set */
+
+			/* First tail page only */
 			unsigned short int compound_dtor;
 			unsigned short int compound_order;
 		};
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-		pgtable_t pmd_huge_pte; /* protected by page->ptl */
+		struct {
+			unsigned long __pad;	/* do not overlay pmd_huge_pte
+						 * with compound_head to avoid
+						 * possible bit 0 collision.
+						 */
+			pgtable_t pmd_huge_pte; /* protected by page->ptl */
+		};
 #endif
 	};
 
@@ -160,7 +175,6 @@ struct page {
 #endif
 #endif
 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
-		struct page *first_page;	/* Compound tail pages */
 	};
 
 #ifdef CONFIG_MEMCG
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a525e5067484..bb53c7b86315 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -86,12 +86,7 @@ enum pageflags {
 	PG_private,		/* If pagecache, has fs-private data */
 	PG_private_2,		/* If pagecache, has fs aux data */
 	PG_writeback,		/* Page is under writeback */
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
 	PG_head,		/* A head page */
-	PG_tail,		/* A tail page */
-#else
-	PG_compound,		/* A compound page */
-#endif
 	PG_swapcache,		/* Swap page: swp_entry_t in private */
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
@@ -398,85 +393,46 @@ static inline void set_page_writeback_keepwrite(struct page *page)
 	test_set_page_writeback_keepwrite(page);
 }
 
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
-/*
- * System with lots of page flags available. This allows separate
- * flags for PageHead() and PageTail() checks of compound pages so that bit
- * tests can be used in performance sensitive paths. PageCompound is
- * generally not used in hot code paths except arch/powerpc/mm/init_64.c
- * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages
- * and avoid handling those in real mode.
- */
 __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
-__PAGEFLAG(Tail, tail)
 
-static inline int PageCompound(struct page *page)
-{
-	return page->flags & ((1L << PG_head) | (1L << PG_tail));
-
-}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void ClearPageCompound(struct page *page)
+static inline int PageTail(struct page *page)
 {
-	BUG_ON(!PageHead(page));
-	ClearPageHead(page);
+	return READ_ONCE(page->compound_head) & 1;
 }
-#endif
-
-#define PG_head_mask ((1L << PG_head))
 
-#else
-/*
- * Reduce page flag use as much as possible by overlapping
- * compound page flags with the flags used for page cache pages. Possible
- * because PageCompound is always set for compound pages and not for
- * pages on the LRU and/or pagecache.
- */
-TESTPAGEFLAG(Compound, compound)
-__SETPAGEFLAG(Head, compound)  __CLEARPAGEFLAG(Head, compound)
-
-/*
- * PG_reclaim is used in combination with PG_compound to mark the
- * head and tail of a compound page. This saves one page flag
- * but makes it impossible to use compound pages for the page cache.
- * The PG_reclaim bit would have to be used for reclaim or readahead
- * if compound pages enter the page cache.
- *
- * PG_compound & PG_reclaim	=> Tail page
- * PG_compound & ~PG_reclaim	=> Head page
- */
-#define PG_head_mask ((1L << PG_compound))
-#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
-
-static inline int PageHead(struct page *page)
+static inline void set_compound_head(struct page *page, struct page *head)
 {
-	return ((page->flags & PG_head_tail_mask) == PG_head_mask);
+	WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
 }
 
-static inline int PageTail(struct page *page)
+static inline void clear_compound_head(struct page *page)
 {
-	return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
+	WRITE_ONCE(page->compound_head, 0);
 }
 
-static inline void __SetPageTail(struct page *page)
+static inline struct page *compound_head(struct page *page)
 {
-	page->flags |= PG_head_tail_mask;
+	unsigned long head = READ_ONCE(page->compound_head);
+
+	if (unlikely(head & 1))
+		return (struct page *) (head - 1);
+	return page;
 }
 
-static inline void __ClearPageTail(struct page *page)
+static inline int PageCompound(struct page *page)
 {
-	page->flags &= ~PG_head_tail_mask;
-}
+	return PageHead(page) || PageTail(page);
 
+}
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void ClearPageCompound(struct page *page)
 {
-	BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound));
-	clear_bit(PG_compound, &page->flags);
+	BUG_ON(!PageHead(page));
+	ClearPageHead(page);
 }
 #endif
 
-#endif /* !PAGEFLAGS_EXTENDED */
+#define PG_head_mask ((1L << PG_head))
 
 #ifdef CONFIG_HUGETLB_PAGE
 int PageHuge(struct page *page);
diff --git a/mm/Kconfig b/mm/Kconfig
index 0d9fdcd01e47..97a4e06b15c0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -200,18 +200,6 @@ config MEMORY_HOTREMOVE
 	depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
 	depends on MIGRATION
 
-#
-# If we have space for more page flags then we can enable additional
-# optimizations and functionality.
-#
-# Regular Sparsemem takes page flag bits for the sectionid if it does not
-# use a virtual memmap. Disable extended page flags for 32 bit platforms
-# that require the use of a sectionid in the page flags.
-#
-config PAGEFLAGS_EXTENDED
-	def_bool y
-	depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM
-
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
diff --git a/mm/debug.c b/mm/debug.c
index e784110fb51d..668aa35191ca 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -25,12 +25,7 @@ static const struct trace_print_flags pageflag_names[] = {
 	{1UL << PG_private,		"private"	},
 	{1UL << PG_private_2,		"private_2"	},
 	{1UL << PG_writeback,		"writeback"	},
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
 	{1UL << PG_head,		"head"		},
-	{1UL << PG_tail,		"tail"		},
-#else
-	{1UL << PG_compound,		"compound"	},
-#endif
 	{1UL << PG_swapcache,		"swapcache"	},
 	{1UL << PG_mappedtodisk,	"mappedtodisk"	},
 	{1UL << PG_reclaim,		"reclaim"	},
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 73266ee7274c..e1ccc83f73d3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1755,8 +1755,7 @@ static void __split_huge_page_refcount(struct page *page,
 				      (1L << PG_unevictable)));
 		page_tail->flags |= (1L << PG_dirty);
 
-		/* clear PageTail before overwriting first_page */
-		smp_wmb();
+		clear_compound_head(page_tail);
 
 		if (page_is_young(page))
 			set_page_young(page_tail);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e90a29024c5c..4eb0f0964883 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1001,9 +1001,8 @@ static void destroy_compound_gigantic_page(struct page *page,
 	struct page *p = page + 1;
 
 	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
-		__ClearPageTail(p);
+		clear_compound_head(p);
 		set_page_refcounted(p);
-		p->first_page = NULL;
 	}
 
 	set_compound_order(page, 0);
@@ -1276,10 +1275,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
 		 */
 		__ClearPageReserved(p);
 		set_page_count(p, 0);
-		p->first_page = page;
-		/* Make sure p->first_page is always valid for PageTail() */
-		smp_wmb();
-		__SetPageTail(p);
+		set_compound_head(p, page);
 	}
 }
 
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 33d59abe91f1..d8fb10de0f14 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -385,7 +385,7 @@ void __init hugetlb_cgroup_file_init(void)
 		/*
 		 * Add cgroup control files only if the huge page consists
 		 * of more than two normal pages. This is because we use
-		 * page[2].lru.next for storing cgroup details.
+		 * page[2].private for storing cgroup details.
 		 */
 		if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 			__hugetlb_cgroup_file_init(hstate_index(h));
diff --git a/mm/internal.h b/mm/internal.h
index 5b7841f6fa27..a7f5670fea23 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -80,9 +80,9 @@ static inline void __get_page_tail_foll(struct page *page,
 	 * speculative page access (like in
 	 * page_cache_get_speculative()) on tail pages.
 	 */
-	VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page);
+	VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page);
 	if (get_page_head)
-		atomic_inc(&page->first_page->_count);
+		atomic_inc(&compound_head(page)->_count);
 	get_huge_page_tail(page);
 }
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 16a0ec385320..8424b64711ac 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -776,8 +776,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 #define lru		(1UL << PG_lru)
 #define swapbacked	(1UL << PG_swapbacked)
 #define head		(1UL << PG_head)
-#define tail		(1UL << PG_tail)
-#define compound	(1UL << PG_compound)
 #define slab		(1UL << PG_slab)
 #define reserved	(1UL << PG_reserved)
 
@@ -800,12 +798,7 @@ static struct page_state {
 	 */
 	{ slab,		slab,		MF_MSG_SLAB,	me_kernel },
 
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
 	{ head,		head,		MF_MSG_HUGE,		me_huge_page },
-	{ tail,		tail,		MF_MSG_HUGE,		me_huge_page },
-#else
-	{ compound,	compound,	MF_MSG_HUGE,		me_huge_page },
-#endif
 
 	{ sc|dirty,	sc|dirty,	MF_MSG_DIRTY_SWAPCACHE,	me_swapcache_dirty },
 	{ sc|dirty,	sc,		MF_MSG_CLEAN_SWAPCACHE,	me_swapcache_clean },
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fae1bd6f9f37..e361001519d3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,15 +445,15 @@ out:
 /*
  * Higher-order pages are called "compound pages".  They are structured thusly:
  *
- * The first PAGE_SIZE page is called the "head page".
+ * The first PAGE_SIZE page is called the "head page" and have PG_head set.
  *
- * The remaining PAGE_SIZE pages are called "tail pages".
+ * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded
+ * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
  *
- * All pages have PG_compound set.  All tail pages have their ->first_page
- * pointing at the head page.
+ * The first tail page's ->compound_dtor holds the offset in array of compound
+ * page destructors. See compound_page_dtors.
  *
- * The first tail page's ->lru.next holds the address of the compound page's
- * put_page() function.  Its ->lru.prev holds the order of allocation.
+ * The first tail page's ->compound_order holds the order of allocation.
  * This usage means that zero-order pages may not be compound.
  */
 
@@ -473,10 +473,7 @@ void prep_compound_page(struct page *page, unsigned long order)
 	for (i = 1; i < nr_pages; i++) {
 		struct page *p = page + i;
 		set_page_count(p, 0);
-		p->first_page = page;
-		/* Make sure p->first_page is always valid for PageTail() */
-		smp_wmb();
-		__SetPageTail(p);
+		set_compound_head(p, page);
 	}
 }
 
@@ -854,17 +851,30 @@ static void free_one_page(struct zone *zone,
 
 static int free_tail_pages_check(struct page *head_page, struct page *page)
 {
-	if (!IS_ENABLED(CONFIG_DEBUG_VM))
-		return 0;
+	int ret = 1;
+
+	/*
+	 * We rely page->lru.next never has bit 0 set, unless the page
+	 * is PageTail(). Let's make sure that's true even for poisoned ->lru.
+	 */
+	BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
+
+	if (!IS_ENABLED(CONFIG_DEBUG_VM)) {
+		ret = 0;
+		goto out;
+	}
 	if (unlikely(!PageTail(page))) {
 		bad_page(page, "PageTail not set", 0);
-		return 1;
+		goto out;
 	}
-	if (unlikely(page->first_page != head_page)) {
-		bad_page(page, "first_page not consistent", 0);
-		return 1;
+	if (unlikely(compound_head(page) != head_page)) {
+		bad_page(page, "compound_head not consistent", 0);
+		goto out;
 	}
-	return 0;
+	ret = 0;
+out:
+	clear_compound_head(page);
+	return ret;
 }
 
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -931,6 +941,10 @@ void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
 			struct page *page = pfn_to_page(start_pfn);
 
 			init_reserved_page(start_pfn);
+
+			/* Avoid false-positive PageTail() */
+			INIT_LIST_HEAD(&page->lru);
+
 			SetPageReserved(page);
 		}
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 983f692a47fd..39395fb549c0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -201,7 +201,7 @@ out_put_single:
 				__put_single_page(page);
 			return;
 		}
-		VM_BUG_ON_PAGE(page_head != page->first_page, page);
+		VM_BUG_ON_PAGE(page_head != compound_head(page), page);
 		/*
 		 * We can release the refcount taken by
 		 * get_page_unless_zero() now that
@@ -262,7 +262,7 @@ static void put_compound_page(struct page *page)
 	 *  Case 3 is possible, as we may race with
 	 *  __split_huge_page_refcount tearing down a THP page.
 	 */
-	page_head = compound_head_by_tail(page);
+	page_head = compound_head(page);
 	if (!__compound_tail_refcounted(page_head))
 		put_unrefcounted_compound_page(page_head, page);
 	else
-- 
cgit v1.2.3


From d00181b96eb86c914cb327d1de974a1b71366e1b Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:29:57 -0800
Subject: mm: use 'unsigned int' for page order

Let's try to be consistent about data type of page order.

[sfr@canb.auug.org.au: fix build (type of pageblock_order)]
[hughd@google.com: some configs end up with MAX_ORDER and pageblock_order having different types]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h              |  5 +++--
 include/linux/pageblock-flags.h |  2 +-
 mm/hugetlb.c                    | 19 ++++++++++---------
 mm/internal.h                   |  4 ++--
 mm/page_alloc.c                 | 29 ++++++++++++++++-------------
 5 files changed, 32 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9671b6f23eda..00bad7793788 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -550,7 +550,7 @@ static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
 	return compound_page_dtors[page[1].compound_dtor];
 }
 
-static inline int compound_order(struct page *page)
+static inline unsigned int compound_order(struct page *page)
 {
 	if (!PageHead(page))
 		return 0;
@@ -1810,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 
 extern __printf(3, 4)
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order,
+		const char *fmt, ...);
 
 extern void setup_per_cpu_pageset(void);
 
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 2baeee12f48e..e942558b3585 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -44,7 +44,7 @@ enum pageblock_bits {
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 
 /* Huge page sizes are variable */
-extern int pageblock_order;
+extern unsigned int pageblock_order;
 
 #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4eb0f0964883..7ce07d681265 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -994,7 +994,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 
 #if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
 static void destroy_compound_gigantic_page(struct page *page,
-					unsigned long order)
+					unsigned int order)
 {
 	int i;
 	int nr_pages = 1 << order;
@@ -1009,7 +1009,7 @@ static void destroy_compound_gigantic_page(struct page *page,
 	__ClearPageHead(page);
 }
 
-static void free_gigantic_page(struct page *page, unsigned order)
+static void free_gigantic_page(struct page *page, unsigned int order)
 {
 	free_contig_range(page_to_pfn(page), 1 << order);
 }
@@ -1053,7 +1053,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
 	return zone_spans_pfn(zone, last_pfn);
 }
 
-static struct page *alloc_gigantic_page(int nid, unsigned order)
+static struct page *alloc_gigantic_page(int nid, unsigned int order)
 {
 	unsigned long nr_pages = 1 << order;
 	unsigned long ret, pfn, flags;
@@ -1089,7 +1089,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order)
 }
 
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
-static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+static void prep_compound_gigantic_page(struct page *page, unsigned int order);
 
 static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
 {
@@ -1122,9 +1122,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
 static inline bool gigantic_page_supported(void) { return true; }
 #else
 static inline bool gigantic_page_supported(void) { return false; }
-static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void free_gigantic_page(struct page *page, unsigned int order) { }
 static inline void destroy_compound_gigantic_page(struct page *page,
-						unsigned long order) { }
+						unsigned int order) { }
 static inline int alloc_fresh_gigantic_page(struct hstate *h,
 					nodemask_t *nodes_allowed) { return 0; }
 #endif
@@ -1250,7 +1250,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 	put_page(page); /* free it into the hugepage allocator */
 }
 
-static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned int order)
 {
 	int i;
 	int nr_pages = 1 << order;
@@ -1968,7 +1968,8 @@ found:
 	return 1;
 }
 
-static void __init prep_compound_huge_page(struct page *page, int order)
+static void __init prep_compound_huge_page(struct page *page,
+		unsigned int order)
 {
 	if (unlikely(order > (MAX_ORDER - 1)))
 		prep_compound_gigantic_page(page, order);
@@ -2679,7 +2680,7 @@ static int __init hugetlb_init(void)
 module_init(hugetlb_init);
 
 /* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
+void __init hugetlb_add_hstate(unsigned int order)
 {
 	struct hstate *h;
 	unsigned long i;
diff --git a/mm/internal.h b/mm/internal.h
index a7f5670fea23..38e24b89e4c4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -177,7 +177,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
 					unsigned int order);
-extern void prep_compound_page(struct page *page, unsigned long order);
+extern void prep_compound_page(struct page *page, unsigned int order);
 #ifdef CONFIG_MEMORY_FAILURE
 extern bool is_free_buddy_page(struct page *page);
 #endif
@@ -235,7 +235,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
  * page cannot be allocated or merged in parallel. Alternatively, it must
  * handle invalid values gracefully, and use page_order_unsafe() below.
  */
-static inline unsigned long page_order(struct page *page)
+static inline unsigned int page_order(struct page *page)
 {
 	/* PageBuddy() must be checked by the caller */
 	return page_private(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e361001519d3..208e4c7e771b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,7 +181,7 @@ bool pm_suspended_storage(void)
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
-int pageblock_order __read_mostly;
+unsigned int pageblock_order __read_mostly;
 #endif
 
 static void __free_pages_ok(struct page *page, unsigned int order);
@@ -462,7 +462,7 @@ static void free_compound_page(struct page *page)
 	__free_pages_ok(page, compound_order(page));
 }
 
-void prep_compound_page(struct page *page, unsigned long order)
+void prep_compound_page(struct page *page, unsigned int order)
 {
 	int i;
 	int nr_pages = 1 << order;
@@ -662,7 +662,7 @@ static inline void __free_one_page(struct page *page,
 	unsigned long combined_idx;
 	unsigned long uninitialized_var(buddy_idx);
 	struct page *buddy;
-	int max_order = MAX_ORDER;
+	unsigned int max_order = MAX_ORDER;
 
 	VM_BUG_ON(!zone_is_initialized(zone));
 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
@@ -675,7 +675,7 @@ static inline void __free_one_page(struct page *page,
 		 * pageblock. Without this, pageblock isolation
 		 * could cause incorrect freepage accounting.
 		 */
-		max_order = min(MAX_ORDER, pageblock_order + 1);
+		max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
 	} else {
 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 	}
@@ -1471,7 +1471,7 @@ int move_freepages(struct zone *zone,
 			  int migratetype)
 {
 	struct page *page;
-	unsigned long order;
+	unsigned int order;
 	int pages_moved = 0;
 
 #ifndef CONFIG_HOLES_IN_ZONE
@@ -1584,7 +1584,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
 static void steal_suitable_fallback(struct zone *zone, struct page *page,
 							  int start_type)
 {
-	int current_order = page_order(page);
+	unsigned int current_order = page_order(page);
 	int pages;
 
 	/* Take ownership for orders >= pageblock_order */
@@ -2637,7 +2637,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
 		DEFAULT_RATELIMIT_INTERVAL,
 		DEFAULT_RATELIMIT_BURST);
 
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
 {
 	unsigned int filter = SHOW_MEM_FILTER_NODES;
 
@@ -2671,7 +2671,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 		va_end(args);
 	}
 
-	pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+	pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n",
 		current->comm, order, gfp_mask);
 
 	dump_stack();
@@ -3449,7 +3449,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order)
 	}
 }
 
-static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+static void *make_alloc_exact(unsigned long addr, unsigned int order,
+		size_t size)
 {
 	if (addr) {
 		unsigned long alloc_end = addr + (PAGE_SIZE << order);
@@ -3499,7 +3500,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
  */
 void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
-	unsigned order = get_order(size);
+	unsigned int order = get_order(size);
 	struct page *p = alloc_pages_node(nid, gfp_mask, order);
 	if (!p)
 		return NULL;
@@ -3800,7 +3801,8 @@ void show_free_areas(unsigned int filter)
 	}
 
 	for_each_populated_zone(zone) {
-		unsigned long nr[MAX_ORDER], flags, order, total = 0;
+		unsigned int order;
+		unsigned long nr[MAX_ORDER], flags, total = 0;
 		unsigned char types[MAX_ORDER];
 
 		if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -4149,7 +4151,7 @@ static void build_zonelists(pg_data_t *pgdat)
 	nodemask_t used_mask;
 	int local_node, prev_node;
 	struct zonelist *zonelist;
-	int order = current_zonelist_order;
+	unsigned int order = current_zonelist_order;
 
 	/* initialize zonelists */
 	for (i = 0; i < MAX_ZONELISTS; i++) {
@@ -6678,7 +6680,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		       unsigned migratetype)
 {
 	unsigned long outer_start, outer_end;
-	int ret = 0, order;
+	unsigned int order;
+	int ret = 0;
 
 	struct compact_control cc = {
 		.nr_migratepages = 0,
-- 
cgit v1.2.3


From 1965c8b7ac7dd147663faf77a66a693ac3ddcb85 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 6 Nov 2015 16:30:00 -0800
Subject: mm: use 'unsigned int' for compound_dtor/compound_order on 64BIT

On 64 bit system we have enough space in struct page to encode
compound_dtor and compound_order with unsigned int.

On x86-64 it leads to slightly smaller code size due usesage of plain
MOV instead of MOVZX (zero-extended move) or similar effect.

allyesconfig:

   text	   data	    bss	    dec	    hex	filename
159520446	48146736	72196096	279863278	10ae5fee	vmlinux.pre
159520382	48146736	72196096	279863214	10ae5fae	vmlinux.post

On other architectures without native support of 16-bit data types the

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bb91658c603f..f8d1492a114f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -143,8 +143,19 @@ struct page {
 			unsigned long compound_head; /* If bit zero is set */
 
 			/* First tail page only */
+#ifdef CONFIG_64BIT
+			/*
+			 * On 64 bit system we have enough space in struct page
+			 * to encode compound_dtor and compound_order with
+			 * unsigned int. It can help compiler generate better or
+			 * smaller code on some archtectures.
+			 */
+			unsigned int compound_dtor;
+			unsigned int compound_order;
+#else
 			unsigned short int compound_dtor;
 			unsigned short int compound_order;
+#endif
 		};
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-- 
cgit v1.2.3


From 9add850c211a39d5ab1a091d48795e21599a73d0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 6 Nov 2015 16:30:09 -0800
Subject: include/linux/compiler-gcc.h: improve __visible documentation

Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0e3110a0b771..22ab246feed3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -205,7 +205,10 @@
 
 #if GCC_VERSION >= 40600
 /*
- * Tell the optimizer that something else uses this function or variable.
+ * When used with Link Time Optimization, gcc can optimize away C functions or
+ * variables which are referenced only from assembly code.  __visible tells the
+ * optimizer that something else uses this function or variable, thus preventing
+ * this.
  */
 #define __visible	__attribute__((externally_visible))
 #endif
-- 
cgit v1.2.3


From e2eb53aa96754b97d158eff884dde88abbad925e Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martink@posteo.de>
Date: Fri, 6 Nov 2015 16:30:58 -0800
Subject: bitops.h: improve sign_extend32()'s documentation

It is often overlooked that sign_extend32(), despite its name, is safe to
use for 16 and 8 bit types as well.  This should help prevent sign
extension being done manually some other way.

Signed-off-by: Martin Kepplinger <martin.kepplinger@theobroma-systems.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: George Spelvin <linux@horizon.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Maxime Coquelin <maxime.coquelin@st.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index e63553386ae7..5629923a8701 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -164,6 +164,8 @@ static inline __u8 ror8(__u8 word, unsigned int shift)
  * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit
  * @value: value to sign extend
  * @index: 0 based bit index (0<=index<32) to sign bit
+ *
+ * This is safe to use for 16- and 8-bit types as well.
  */
 static inline __s32 sign_extend32(__u32 value, int index)
 {
-- 
cgit v1.2.3


From 48e203e21b29cd4b2c58403fe8bca68e2e854895 Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martink@posteo.de>
Date: Fri, 6 Nov 2015 16:31:02 -0800
Subject: bitops.h: add sign_extend64()

Months back, this was discussed, see https://lkml.org/lkml/2015/1/18/289
The result was the 64-bit version being "likely fine", "valuable" and
"correct".  The discussion fell asleep but since there are possible users,
let's add it.

Signed-off-by: Martin Kepplinger <martin.kepplinger@theobroma-systems.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: George Spelvin <linux@horizon.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Maxime Coquelin <maxime.coquelin@st.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 5629923a8701..2b8ed123ad36 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -173,6 +173,17 @@ static inline __s32 sign_extend32(__u32 value, int index)
 	return (__s32)(value << shift) >> shift;
 }
 
+/**
+ * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit
+ * @value: value to sign extend
+ * @index: 0 based bit index (0<=index<64) to sign bit
+ */
+static inline __s64 sign_extend64(__u64 value, int index)
+{
+	__u8 shift = 63 - index;
+	return (__s64)(value << shift) >> shift;
+}
+
 static inline unsigned fls_long(unsigned long l)
 {
 	if (sizeof(l) == 4)
-- 
cgit v1.2.3


From 0a9df786a6ae2f898114bdd242b64920dedf53bd Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 6 Nov 2015 16:31:20 -0800
Subject: lib/kasprintf.c: introduce kvasprintf_const

This adds kvasprintf_const which tries to use kstrdup_const if possible:
If the format string contains no % characters, or if the format string is
exactly "%s", we delegate to kstrdup_const.  Otherwise, we fall back to
kvasprintf.

Just as for kstrdup_const, the main motivation is to save memory by
reusing .rodata when possible.

The return value should be freed by kfree_const, just like for
kstrdup_const.

There is deliberately no kasprintf_const: In the vast majority of cases,
the format string argument is a literal, so one can determine statically
whether one could instead use kstrdup_const directly (which would also
require one to change all corresponding kfree calls to kfree_const).

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  2 ++
 lib/kasprintf.c        | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5582410727cb..2c13f747ac2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -413,6 +413,8 @@ extern __printf(2, 3)
 char *kasprintf(gfp_t gfp, const char *fmt, ...);
 extern __printf(2, 0)
 char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
+extern __printf(2, 0)
+const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);
 
 extern __scanf(2, 3)
 int sscanf(const char *, const char *, ...);
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index 32f12150fc4f..f194e6e593e1 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -31,6 +31,22 @@ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
 }
 EXPORT_SYMBOL(kvasprintf);
 
+/*
+ * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt
+ * (or the sole vararg) points to rodata, we will then save a memory
+ * allocation and string copy. In any case, the return value should be
+ * freed using kfree_const().
+ */
+const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap)
+{
+	if (!strchr(fmt, '%'))
+		return kstrdup_const(fmt, gfp);
+	if (!strcmp(fmt, "%s"))
+		return kstrdup_const(va_arg(ap, const char*), gfp);
+	return kvasprintf(gfp, fmt, ap);
+}
+EXPORT_SYMBOL(kvasprintf_const);
+
 char *kasprintf(gfp_t gfp, const char *fmt, ...)
 {
 	va_list ap;
-- 
cgit v1.2.3


From 8de1ee7ebfb4979c6444e81273e12e7a972c367d Mon Sep 17 00:00:00 2001
From: Cody P Schafer <dev@codyps.com>
Date: Fri, 6 Nov 2015 16:31:28 -0800
Subject: rbtree: clarify documentation of
 rbtree_postorder_for_each_entry_safe()

I noticed that commit a20135ffbc44 ("writeback: don't drain
bdi_writeback_congested on bdi destruction") added a usage of
rbtree_postorder_for_each_entry_safe() in mm/backing-dev.c which appears
to try to rb_erase() elements from an rbtree while iterating over it using
rbtree_postorder_for_each_entry_safe().

Doing this will cause random nodes to be missed by the iteration because
rb_erase() may rebalance the tree, changing the ordering that we're trying
to iterate over.

The previous documentation for rbtree_postorder_for_each_entry_safe()
wasn't clear that this wasn't allowed, it was taken from the docs for
list_for_each_entry_safe(), where erasing isn't a problem due to
list_del() not reordering.

Explicitly warn developers about this potential pit-fall.

Note that I haven't fixed the actual issue that (it appears) the commit
referenced above introduced (not familiar enough with that code).

In general (and in this case), the patterns to follow are:
 - switch to rb_first() + rb_erase(), don't use
   rbtree_postorder_for_each_entry_safe().
 - keep the postorder iteration and don't rb_erase() at all. Instead
   just clear the fields of rb_node & cgwb_congested_tree as required by
   other users of those structures.

[akpm@linux-foundation.org: tweak comments]
Signed-off-by: Cody P Schafer <dev@codyps.com>
Cc: John de la Garza <john@jjdev.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 830c4992088d..a5aa7ae671f4 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -101,13 +101,21 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent
 	})
 
 /**
- * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
- * given type safe against removal of rb_node entry
+ * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
+ * given type allowing the backing memory of @pos to be invalidated
  *
  * @pos:	the 'type *' to use as a loop cursor.
  * @n:		another 'type *' to use as temporary storage
  * @root:	'rb_root *' of the rbtree.
  * @field:	the name of the rb_node field within 'type'.
+ *
+ * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
+ * list_for_each_entry_safe() and allows the iteration to continue independent
+ * of changes to @pos by the body of the loop.
+ *
+ * Note, however, that it cannot handle other modifications that re-order the
+ * rbtree it is iterating over. This includes calling rb_erase() on @pos, as
+ * rb_erase() may rebalance the tree, causing us to miss some nodes.
  */
 #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
 	for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
-- 
cgit v1.2.3


From 2e01fabe67ccaff1d59bda01e60a61f5fb0aa7b6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:19 -0800
Subject: signals: kill block_all_signals() and unblock_all_signals()

It is hardly possible to enumerate all problems with block_all_signals()
and unblock_all_signals().  Just for example,

1. block_all_signals(SIGSTOP/etc) simply can't help if the caller is
   multithreaded. Another thread can dequeue the signal and force the
   group stop.

2. Even is the caller is single-threaded, it will "stop" anyway. It
   will not sleep, but it will spin in kernel space until SIGCONT or
   SIGKILL.

And a lot more. In short, this interface doesn't work at all, at least
the last 10+ years.

Daniel said:

  Yeah the only times I played around with the DRM_LOCK stuff was when
  old drivers accidentally deadlocked - my impression is that the entire
  DRM_LOCK thing was never really tested properly ;-) Hence I'm all for
  purging where this leaks out of the drm subsystem.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Dave Airlie <airlied@redhat.com>
Cc: Richard Weinberger <richard@nod.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_lock.c | 41 -------------------------------------
 include/drm/drmP.h         |  1 -
 include/linux/sched.h      |  7 +------
 kernel/signal.c            | 51 +---------------------------------------------
 4 files changed, 2 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 4924d381b664..daa2ff12101b 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -38,8 +38,6 @@
 #include "drm_legacy.h"
 #include "drm_internal.h"
 
-static int drm_notifier(void *priv);
-
 static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
 
 /**
@@ -118,14 +116,8 @@ int drm_legacy_lock(struct drm_device *dev, void *data,
 	 * really probably not the correct answer but lets us debug xkb
  	 * xserver for now */
 	if (!file_priv->is_master) {
-		sigemptyset(&dev->sigmask);
-		sigaddset(&dev->sigmask, SIGSTOP);
-		sigaddset(&dev->sigmask, SIGTSTP);
-		sigaddset(&dev->sigmask, SIGTTIN);
-		sigaddset(&dev->sigmask, SIGTTOU);
 		dev->sigdata.context = lock->context;
 		dev->sigdata.lock = master->lock.hw_lock;
-		block_all_signals(drm_notifier, dev, &dev->sigmask);
 	}
 
 	if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT))
@@ -169,7 +161,6 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_
 		/* FIXME: Should really bail out here. */
 	}
 
-	unblock_all_signals();
 	return 0;
 }
 
@@ -287,38 +278,6 @@ int drm_legacy_lock_free(struct drm_lock_data *lock_data, unsigned int context)
 	return 0;
 }
 
-/**
- * If we get here, it means that the process has called DRM_IOCTL_LOCK
- * without calling DRM_IOCTL_UNLOCK.
- *
- * If the lock is not held, then let the signal proceed as usual.  If the lock
- * is held, then set the contended flag and keep the signal blocked.
- *
- * \param priv pointer to a drm_device structure.
- * \return one if the signal should be delivered normally, or zero if the
- * signal should be blocked.
- */
-static int drm_notifier(void *priv)
-{
-	struct drm_device *dev = priv;
-	struct drm_hw_lock *lock = dev->sigdata.lock;
-	unsigned int old, new, prev;
-
-	/* Allow signal delivery if lock isn't held */
-	if (!lock || !_DRM_LOCK_IS_HELD(lock->lock)
-	    || _DRM_LOCKING_CONTEXT(lock->lock) != dev->sigdata.context)
-		return 1;
-
-	/* Otherwise, set flag to force call to
-	   drmUnlock */
-	do {
-		old = lock->lock;
-		new = old | _DRM_LOCK_CONT;
-		prev = cmpxchg(&lock->lock, old, new);
-	} while (prev != old);
-	return 0;
-}
-
 /**
  * This function returns immediately and takes the hw lock
  * with the kernel context if it is free, otherwise it gets the highest priority when and if
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8b5ce7c5d9bb..f56cdcecc1c9 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -822,7 +822,6 @@ struct drm_device {
 
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	unsigned int num_crtcs;                  /**< Number of CRTCs on this device */
-	sigset_t sigmask;
 
 	struct {
 		int context;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eeb5066a44fb..923ec1a9b2b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1570,9 +1570,7 @@ struct task_struct {
 
 	unsigned long sas_ss_sp;
 	size_t sas_ss_size;
-	int (*notifier)(void *priv);
-	void *notifier_data;
-	sigset_t *notifier_mask;
+
 	struct callback_head *task_works;
 
 	struct audit_context *audit_context;
@@ -2476,9 +2474,6 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
 	return ret;
 }
 
-extern void block_all_signals(int (*notifier)(void *priv), void *priv,
-			      sigset_t *mask);
-extern void unblock_all_signals(void);
 extern void release_task(struct task_struct * p);
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
diff --git a/kernel/signal.c b/kernel/signal.c
index 0f6bbbe77b46..f2cbd4ed5cd4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -503,41 +503,6 @@ int unhandled_signal(struct task_struct *tsk, int sig)
 	return !tsk->ptrace;
 }
 
-/*
- * Notify the system that a driver wants to block all signals for this
- * process, and wants to be notified if any signals at all were to be
- * sent/acted upon.  If the notifier routine returns non-zero, then the
- * signal will be acted upon after all.  If the notifier routine returns 0,
- * then then signal will be blocked.  Only one block per process is
- * allowed.  priv is a pointer to private data that the notifier routine
- * can use to determine if the signal should be blocked or not.
- */
-void
-block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->notifier_mask = mask;
-	current->notifier_data = priv;
-	current->notifier = notifier;
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-
-/* Notify the system that blocking has ended. */
-
-void
-unblock_all_signals(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->notifier = NULL;
-	current->notifier_data = NULL;
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-}
-
 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 {
 	struct sigqueue *q, *first = NULL;
@@ -580,19 +545,8 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
 {
 	int sig = next_signal(pending, mask);
 
-	if (sig) {
-		if (current->notifier) {
-			if (sigismember(current->notifier_mask, sig)) {
-				if (!(current->notifier)(current->notifier_data)) {
-					clear_thread_flag(TIF_SIGPENDING);
-					return 0;
-				}
-			}
-		}
-
+	if (sig)
 		collect_signal(sig, pending, info);
-	}
-
 	return sig;
 }
 
@@ -2483,9 +2437,6 @@ EXPORT_SYMBOL(force_sig);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
 EXPORT_SYMBOL(sigprocmask);
-EXPORT_SYMBOL(block_all_signals);
-EXPORT_SYMBOL(unblock_all_signals);
-
 
 /*
  * System call entry points.
-- 
cgit v1.2.3


From be0e6f290f78b84a3b21b8c8c46819c4514fe632 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:22 -0800
Subject: signal: turn dequeue_signal_lock() into kernel_dequeue_signal()

1. Rename dequeue_signal_lock() to kernel_dequeue_signal(). This
   matches another "for kthreads only" kernel_sigaction() helper.

2. Remove the "tsk" and "mask" arguments, they are always current
   and current->blocked. And it is simply wrong if tsk != current.

3. We could also remove the 3rd "siginfo_t *info" arg but it looks
   potentially useful. However we can simplify the callers if we
   change kernel_dequeue_signal() to accept info => NULL.

4. Remove _irqsave, it is never called from atomic context.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/nbd.c                          | 15 ++++-----------
 drivers/usb/gadget/function/f_mass_storage.c |  4 +---
 fs/jffs2/background.c                        |  3 +--
 include/linux/sched.h                        | 11 ++++++-----
 4 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1b87623381e2..93b3f99b6865 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -444,9 +444,7 @@ static int nbd_thread_recv(struct nbd_device *nbd)
 	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
 
 	if (signal_pending(current)) {
-		siginfo_t info;
-
-		ret = dequeue_signal_lock(current, &current->blocked, &info);
+		ret = kernel_dequeue_signal(NULL);
 		dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
 			 task_pid_nr(current), current->comm, ret);
 		mutex_lock(&nbd->tx_lock);
@@ -560,11 +558,8 @@ static int nbd_thread_send(void *data)
 					 !list_empty(&nbd->waiting_queue));
 
 		if (signal_pending(current)) {
-			siginfo_t info;
-			int ret;
+			int ret = kernel_dequeue_signal(NULL);
 
-			ret = dequeue_signal_lock(current, &current->blocked,
-						  &info);
 			dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
 				 task_pid_nr(current), current->comm, ret);
 			mutex_lock(&nbd->tx_lock);
@@ -592,10 +587,8 @@ static int nbd_thread_send(void *data)
 	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
 
 	/* Clear maybe pending signals */
-	if (signal_pending(current)) {
-		siginfo_t info;
-		dequeue_signal_lock(current, &current->blocked, &info);
-	}
+	if (signal_pending(current))
+		kernel_dequeue_signal(NULL);
 
 	return 0;
 }
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index cd54e72a6c50..5ec533826621 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2345,7 +2345,6 @@ static void fsg_disable(struct usb_function *f)
 
 static void handle_exception(struct fsg_common *common)
 {
-	siginfo_t		info;
 	int			i;
 	struct fsg_buffhd	*bh;
 	enum fsg_state		old_state;
@@ -2357,8 +2356,7 @@ static void handle_exception(struct fsg_common *common)
 	 * into a high-priority EXIT exception.
 	 */
 	for (;;) {
-		int sig =
-			dequeue_signal_lock(current, &current->blocked, &info);
+		int sig = kernel_dequeue_signal(NULL);
 		if (!sig)
 			break;
 		if (sig != SIGUSR1) {
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index bb9cebc9ca8a..f3145fd86d86 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -121,13 +121,12 @@ static int jffs2_garbage_collect_thread(void *_c)
 		/* Put_super will send a SIGKILL and then wait on the sem.
 		 */
 		while (signal_pending(current) || freezing(current)) {
-			siginfo_t info;
 			unsigned long signr;
 
 			if (try_to_freeze())
 				goto again;
 
-			signr = dequeue_signal_lock(current, &current->blocked, &info);
+			signr = kernel_dequeue_signal(NULL);
 
 			switch(signr) {
 			case SIGSTOP:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 923ec1a9b2b4..3d54924b4b86 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2462,14 +2462,15 @@ extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
 
-static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
+static inline int kernel_dequeue_signal(siginfo_t *info)
 {
-	unsigned long flags;
+	struct task_struct *tsk = current;
+	siginfo_t __info;
 	int ret;
 
-	spin_lock_irqsave(&tsk->sighand->siglock, flags);
-	ret = dequeue_signal(tsk, mask, info);
-	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+	spin_lock_irq(&tsk->sighand->siglock);
+	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
+	spin_unlock_irq(&tsk->sighand->siglock);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 9a13049e83f346cb1cbd60c64e520a73c396af16 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Nov 2015 16:32:25 -0800
Subject: signal: introduce kernel_signal_stop() to fix
 jffs2_garbage_collect_thread()

jffs2_garbage_collect_thread() can race with SIGCONT and sleep in
TASK_STOPPED state after it was already sent. Add the new helper,
kernel_signal_stop(), which does this correctly.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jffs2/background.c |  3 +--
 include/linux/sched.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index f3145fd86d86..53cc7350af33 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -132,8 +132,7 @@ static int jffs2_garbage_collect_thread(void *_c)
 			case SIGSTOP:
 				jffs2_dbg(1, "%s(): SIGSTOP received\n",
 					  __func__);
-				set_current_state(TASK_STOPPED);
-				schedule();
+				kernel_signal_stop();
 				break;
 
 			case SIGKILL:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d54924b4b86..4069febaa34a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2475,6 +2475,16 @@ static inline int kernel_dequeue_signal(siginfo_t *info)
 	return ret;
 }
 
+static inline void kernel_signal_stop(void)
+{
+	spin_lock_irq(&current->sighand->siglock);
+	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+		__set_current_state(TASK_STOPPED);
+	spin_unlock_irq(&current->sighand->siglock);
+
+	schedule();
+}
+
 extern void release_task(struct task_struct * p);
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
-- 
cgit v1.2.3


From 002edb6f6f2a79bea50de11260ddc9572e6db731 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 6 Nov 2015 16:32:51 -0800
Subject: dma-mapping: tidy up dma_parms default handling

Many DMA controllers and other devices set max_segment_size to
indicate their scatter-gather capability, but have no interest in
segment_boundary_mask. However, the existence of a dma_parms structure
precludes the use of any default value, leaving them as zeros (assuming
a properly kzalloc'ed structure). If a well-behaved IOMMU (or SWIOTLB)
then tries to respect this by ensuring a mapped segment does not cross
a zero-byte boundary, hilarity ensues.

Since zero is a nonsensical value for either parameter, treat it as an
indicator for "default", as might be expected. In the process, clean up
a bit by replacing the bare constants with slightly more meaningful
macros and removing the superfluous "else" statements.

[akpm@linux-foundation.org: dma-mapping.h needs sizes.h for SZ_64K]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sakari Ailus <sakari.ailus@iki.fi>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-mapping.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ac07ff090919..2e551e2d2d03 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_DMA_MAPPING_H
 #define _LINUX_DMA_MAPPING_H
 
+#include <linux/sizes.h>
 #include <linux/string.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -145,7 +146,9 @@ static inline void arch_teardown_dma_ops(struct device *dev) { }
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
 {
-	return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536;
+	if (dev->dma_parms && dev->dma_parms->max_segment_size)
+		return dev->dma_parms->max_segment_size;
+	return SZ_64K;
 }
 
 static inline unsigned int dma_set_max_seg_size(struct device *dev,
@@ -154,14 +157,15 @@ static inline unsigned int dma_set_max_seg_size(struct device *dev,
 	if (dev->dma_parms) {
 		dev->dma_parms->max_segment_size = size;
 		return 0;
-	} else
-		return -EIO;
+	}
+	return -EIO;
 }
 
 static inline unsigned long dma_get_seg_boundary(struct device *dev)
 {
-	return dev->dma_parms ?
-		dev->dma_parms->segment_boundary_mask : 0xffffffff;
+	if (dev->dma_parms && dev->dma_parms->segment_boundary_mask)
+		return dev->dma_parms->segment_boundary_mask;
+	return DMA_BIT_MASK(32);
 }
 
 static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
@@ -169,8 +173,8 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
 	if (dev->dma_parms) {
 		dev->dma_parms->segment_boundary_mask = mask;
 		return 0;
-	} else
-		return -EIO;
+	}
+	return -EIO;
 }
 
 #ifndef dma_max_pfn
-- 
cgit v1.2.3


From cb7ae262e230064ba282094b7e1f60a092448b72 Mon Sep 17 00:00:00 2001
From: Anish Bhatt <anish@chelsio.com>
Date: Fri, 6 Nov 2015 16:33:01 -0800
Subject: include/linux/zutil.h: fix usage example of zlib_adler32()

alder32 was renamed to zlib_adler32 since before 2.6.11.

Signed-off-by: Anish Bhatt <anish@chelsio.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zutil.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zutil.h b/include/linux/zutil.h
index 6adfa9a6ffe9..663689521759 100644
--- a/include/linux/zutil.h
+++ b/include/linux/zutil.h
@@ -68,10 +68,10 @@ typedef uLong (*check_func) (uLong check, const Byte *buf,
    An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
    much faster. Usage example:
 
-     uLong adler = adler32(0L, NULL, 0);
+     uLong adler = zlib_adler32(0L, NULL, 0);
 
      while (read_buffer(buffer, length) != EOF) {
-       adler = adler32(adler, buffer, length);
+       adler = zlib_adler32(adler, buffer, length);
      }
      if (adler != original_adler) error();
 */
-- 
cgit v1.2.3


From 95ad1f4a9358dff1dcf84bf5c9cc84caa9215f7f Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Nov 2015 11:21:47 +0100
Subject: netfilter: ipset: Fix extension alignment

The data extensions in ipset lacked the proper memory alignment and
thus could lead to kernel crash on several architectures. Therefore
the structures have been reorganized and alignment attributes added
where needed. The patch was tested on armv7h by Gerhard Wiesinger and
on x86_64, sparc64 by Jozsef Kadlecsik.

Reported-by: Gerhard Wiesinger <lists@wiesinger.com>
Tested-by: Gerhard Wiesinger <lists@wiesinger.com>
Tested-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    |  2 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h   | 17 +++-----
 net/netfilter/ipset/ip_set_bitmap_ip.c    | 14 ++-----
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 64 ++++++++++++++-----------------
 net/netfilter/ipset/ip_set_bitmap_port.c  | 18 ++++-----
 net/netfilter/ipset/ip_set_core.c         | 14 ++++---
 net/netfilter/ipset/ip_set_hash_gen.h     | 11 ++++--
 net/netfilter/ipset/ip_set_list_set.c     |  5 ++-
 8 files changed, 65 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 48bb01edcf30..0e1f433cc4b7 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -421,7 +421,7 @@ extern void ip_set_free(void *members);
 extern int ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr);
 extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
 extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
-			      size_t len);
+			      size_t len, size_t align);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 				 struct ip_set_ext *ext);
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
-#define get_ext(set, map, id)	((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
 
 static void
 mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (set->dsize) {
-		if (set->extensions & IPSET_EXT_DESTROY)
-			mtype_ext_cleanup(set);
-		ip_set_free(map->extensions);
-	}
-	kfree(map);
+	if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
+	ip_set_free(map);
 
 	set->data = NULL;
 }
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
 	const struct mtype *map = set->data;
 	struct nlattr *nested;
+	size_t memsize = sizeof(*map) + map->memsize;
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) +
-				map->memsize +
-				set->dsize * map->elements)))
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
 /* Type structure */
 struct bitmap_ip {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
 	size_t memsize;		/* members size */
 	u8 netmask;		/* subnet netmask */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 	pr_debug("hosts %u, elements %llu\n",
 		 hosts, (unsigned long long)elements);
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ip;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_ip(set, map, first_ip, last_ip,
 			 elements, hosts, netmask)) {
 		kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
 /* Type structure */
 struct bitmap_ipmac {
 	void *members;		/* the set members */
-	void *extensions;	/* MAC + data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collector */
+	unsigned char extensions[0]	/* MAC + data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
 struct bitmap_ipmac_adt_elem {
+	unsigned char ether[ETH_ALEN] __aligned(2);
 	u16 id;
-	unsigned char *ether;
+	u16 add_mac;
 };
 
 struct bitmap_ipmac_elem {
 	unsigned char ether[ETH_ALEN];
 	unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
 
 static inline u32
 ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
 	return ip - m->first_ip;
 }
 
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
-	return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize)		\
+	(struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize)	\
+	(const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
 
 /* Common functions */
 
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 
 	if (!test_bit(e->id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, e->id, dsize);
-	if (elem->filled == MAC_FILLED)
-		return !e->ether ||
-		       ether_addr_equal(e->ether, elem->ether);
+	elem = get_const_elem(map->extensions, e->id, dsize);
+	if (e->add_mac && elem->filled == MAC_FILLED)
+		return ether_addr_equal(e->ether, elem->ether);
 	/* Trigger kernel to fill out the ethernet address */
 	return -EAGAIN;
 }
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
 
 	if (!test_bit(id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, id, dsize);
+	elem = get_const_elem(map->extensions, id, dsize);
 	/* Timer not started for the incomplete elements */
 	return elem->filled == MAC_FILLED;
 }
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 		 * and we can reuse it later when MAC is filled out,
 		 * possibly by the kernel
 		 */
-		if (e->ether)
+		if (e->add_mac)
 			ip_set_timeout_set(timeout, t);
 		else
 			*timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 	elem = get_elem(map->extensions, e->id, dsize);
 	if (test_bit(e->id, map->members)) {
 		if (elem->filled == MAC_FILLED) {
-			if (e->ether &&
+			if (e->add_mac &&
 			    (flags & IPSET_FLAG_EXIST) &&
 			    !ether_addr_equal(e->ether, elem->ether)) {
 				/* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 				ether_addr_copy(elem->ether, e->ether);
 			}
 			return IPSET_ADD_FAILED;
-		} else if (!e->ether)
+		} else if (!e->add_mac)
 			/* Already added without ethernet address */
 			return IPSET_ADD_FAILED;
 		/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return IPSET_ADD_START_STORED_TIMEOUT;
-	} else if (e->ether) {
+	} else if (e->add_mac) {
 		/* We can store MAC too */
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
 		     u32 id, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, dsize);
+		get_const_elem(map->extensions, id, dsize);
 
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			       htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 {
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct bitmap_ipmac_adt_elem e = { .id = 0 };
+	struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 		return -EINVAL;
 
 	e.id = ip_to_id(map, ip);
-	e.ether = eth_hdr(skb)->h_source;
+	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
 
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
 		return -IPSET_ERR_BITMAP_RANGE;
 
 	e.id = ip_to_id(map, ip);
-	if (tb[IPSET_ATTR_ETHER])
-		e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
-	else
-		e.ether = NULL;
-
+	if (tb[IPSET_ATTR_ETHER]) {
+		memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+		e.add_mac = 1;
+	}
 	ret = adtfn(set, &e, &ext, &ext, flags);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
 		return -IPSET_ERR_BITMAP_RANGE_SIZE;
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem),
+				     __alignof__(struct bitmap_ipmac_elem));
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ipmac;
-	set->dsize = ip_set_elem_len(set, tb,
-				     sizeof(struct bitmap_ipmac_elem));
 	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
 /* Type structure */
 struct bitmap_port {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u16 first_port;		/* host byte order, included in range */
 	u16 last_port;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * map->elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_port = first_port;
 	map->last_port = last_port;
 	set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
+	u32 elements;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		last_port = tmp;
 	}
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	elements = last_port - first_port + 1;
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
-	map->elements = last_port - first_port + 1;
+	map->elements = elements;
 	map->memsize = bitmap_bytes(0, map->elements);
 	set->variant = &bitmap_port;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_port(set, map, first_port, last_port)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
 }
 
 size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+		size_t align)
 {
 	enum ip_set_ext_id id;
-	size_t offset = len;
 	u32 cadt_flags = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS])
 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
 		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+	if (!align)
+		align = 1;
 	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
 		if (!add_extension(id, cadt_flags, tb))
 			continue;
-		offset = ALIGN(offset, ip_set_extensions[id].align);
-		set->offset[id] = offset;
+		len = ALIGN(len, ip_set_extensions[id].align);
+		set->offset[id] = len;
 		set->extensions |= ip_set_extensions[id].type;
-		offset += ip_set_extensions[id].len;
+		len += ip_set_extensions[id].len;
 	}
-	return offset;
+	return ALIGN(len, align);
 }
 EXPORT_SYMBOL_GPL(ip_set_elem_len);
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..4ff22194ce55 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
 	DECLARE_BITMAP(used, AHASH_MAX_TUNED);
 	u8 size;		/* size of the array */
 	u8 pos;			/* position of the first free entry */
-	unsigned char value[0];	/* the array of the values */
-} __attribute__ ((aligned));
+	unsigned char value[0]	/* the array of the values */
+		__aligned(__alignof__(u64));
+};
 
 /* The hash table: the table size stored here in order to make resizing easy */
 struct htable {
@@ -1323,12 +1324,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 #endif
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
 #ifndef IP_SET_PROTO_UNDEF
 	} else {
 		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
 	}
 #endif
 	if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
 	struct rcu_head rcu;
 	struct list_head list;
 	ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
 
 struct set_adt_elem {
 	ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		size = IP_SET_LIST_MIN_SIZE;
 
 	set->variant = &set_variant;
-	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+				     __alignof__(struct set_elem));
 	if (!init_list_set(net, set, size))
 		return -ENOMEM;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
-- 
cgit v1.2.3


From dece16353ef47d8d33f5302bc158072a9d65e26f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Nov 2015 10:41:16 -0700
Subject: block: change ->make_request_fn() and users to return a queue cookie

No functional changes in this patch, but it prepares us for returning
a more useful cookie related to the IO that was queued up.

Signed-off-by: Jens Axboe <axboe@fb.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
---
 arch/m68k/emu/nfblock.c                     |  3 ++-
 arch/powerpc/sysdev/axonram.c               |  5 +++--
 arch/xtensa/platforms/iss/simdisk.c         |  3 ++-
 block/blk-core.c                            | 26 ++++++++++++++++----------
 block/blk-mq.c                              | 26 ++++++++++++++------------
 drivers/block/brd.c                         |  5 +++--
 drivers/block/drbd/drbd_int.h               |  2 +-
 drivers/block/drbd/drbd_req.c               |  3 ++-
 drivers/block/null_blk.c                    |  3 ++-
 drivers/block/pktcdvd.c                     |  9 ++++-----
 drivers/block/ps3vram.c                     |  6 ++++--
 drivers/block/rsxx/dev.c                    |  5 +++--
 drivers/block/umem.c                        |  4 ++--
 drivers/block/zram/zram_drv.c               |  5 +++--
 drivers/lightnvm/rrpc.c                     |  9 +++++----
 drivers/md/bcache/request.c                 | 11 ++++++++---
 drivers/md/dm.c                             |  6 +++---
 drivers/md/md.c                             |  8 +++++---
 drivers/nvdimm/blk.c                        |  3 ++-
 drivers/nvdimm/btt.c                        |  3 ++-
 drivers/nvdimm/pmem.c                       |  3 ++-
 drivers/s390/block/dcssblk.c                |  8 +++++---
 drivers/s390/block/xpram.c                  |  5 +++--
 drivers/staging/lustre/lustre/llite/lloop.c |  5 +++--
 include/linux/blk_types.h                   | 24 ++++++++++++++++++++++++
 include/linux/blkdev.h                      |  4 ++--
 include/linux/fs.h                          |  2 +-
 include/linux/lightnvm.h                    |  2 +-
 28 files changed, 127 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index f2a00c591bf7..e9110b9b8bcd 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -59,7 +59,7 @@ struct nfhd_device {
 	struct gendisk *disk;
 };
 
-static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct nfhd_device *dev = queue->queuedata;
 	struct bio_vec bvec;
@@ -77,6 +77,7 @@ static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
 		sec += len;
 	}
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index d2b79bc336c1..7a399b4d60a0 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -103,7 +103,7 @@ axon_ram_irq_handler(int irq, void *dev)
  * axon_ram_make_request - make_request() method for block device
  * @queue, @bio: see blk_queue_make_request()
  */
-static void
+static blk_qc_t
 axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
@@ -120,7 +120,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 	bio_for_each_segment(vec, bio, iter) {
 		if (unlikely(phys_mem + vec.bv_len > phys_end)) {
 			bio_io_error(bio);
-			return;
+			return BLK_QC_T_NONE;
 		}
 
 		user_mem = page_address(vec.bv_page) + vec.bv_offset;
@@ -133,6 +133,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 		transfered += vec.bv_len;
 	}
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 /**
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index fa84ca990caa..3c3ace2c46b6 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -101,7 +101,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
 	spin_unlock(&dev->lock);
 }
 
-static void simdisk_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t simdisk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct simdisk *dev = q->queuedata;
 	struct bio_vec bvec;
@@ -119,6 +119,7 @@ static void simdisk_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int simdisk_open(struct block_device *bdev, fmode_t mode)
diff --git a/block/blk-core.c b/block/blk-core.c
index 89eec7965870..e93df6d386a0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -809,7 +809,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 
-static void blk_queue_bio(struct request_queue *q, struct bio *bio);
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
@@ -1678,7 +1678,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
-static void blk_queue_bio(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
@@ -1698,7 +1698,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio->bi_error = -EIO;
 		bio_endio(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
@@ -1713,7 +1713,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	if (!blk_queue_nomerges(q)) {
 		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
-			return;
+			return BLK_QC_T_NONE;
 	} else
 		request_count = blk_plug_queued_count(q);
 
@@ -1791,6 +1791,8 @@ get_rq:
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
+
+	return BLK_QC_T_NONE;
 }
 
 /*
@@ -1996,12 +1998,13 @@ end_io:
  * a lower device by calling into generic_make_request recursively, which
  * means the bio should NOT be touched after the call to ->make_request_fn.
  */
-void generic_make_request(struct bio *bio)
+blk_qc_t generic_make_request(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack;
+	blk_qc_t ret = BLK_QC_T_NONE;
 
 	if (!generic_make_request_checks(bio))
-		return;
+		goto out;
 
 	/*
 	 * We only want one ->make_request_fn to be active at a time, else
@@ -2015,7 +2018,7 @@ void generic_make_request(struct bio *bio)
 	 */
 	if (current->bio_list) {
 		bio_list_add(current->bio_list, bio);
-		return;
+		goto out;
 	}
 
 	/* following loop may be a bit non-obvious, and so deserves some
@@ -2040,7 +2043,7 @@ void generic_make_request(struct bio *bio)
 
 		if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) {
 
-			q->make_request_fn(q, bio);
+			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
 
@@ -2053,6 +2056,9 @@ void generic_make_request(struct bio *bio)
 		}
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
+
+out:
+	return ret;
 }
 EXPORT_SYMBOL(generic_make_request);
 
@@ -2066,7 +2072,7 @@ EXPORT_SYMBOL(generic_make_request);
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
-void submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(int rw, struct bio *bio)
 {
 	bio->bi_rw |= rw;
 
@@ -2100,7 +2106,7 @@ void submit_bio(int rw, struct bio *bio)
 		}
 	}
 
-	generic_make_request(bio);
+	return generic_make_request(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c27b3eaef64..65f43bd696a0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1235,7 +1235,7 @@ static int blk_mq_direct_issue_request(struct request *rq)
  * but will attempt to bypass the hctx queueing if we can go straight to
  * hardware for SYNC IO.
  */
-static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = rw_is_sync(bio->bi_rw);
 	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
@@ -1249,7 +1249,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	blk_queue_split(q, &bio, q->bio_split);
@@ -1257,13 +1257,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (!is_flush_fua && !blk_queue_nomerges(q)) {
 		if (blk_attempt_plug_merge(q, bio, &request_count,
 					   &same_queue_rq))
-			return;
+			return BLK_QC_T_NONE;
 	} else
 		request_count = blk_plug_queued_count(q);
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
-		return;
+		return BLK_QC_T_NONE;
 
 	if (unlikely(is_flush_fua)) {
 		blk_mq_bio_to_request(rq, bio);
@@ -1302,11 +1302,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 			old_rq = rq;
 		blk_mq_put_ctx(data.ctx);
 		if (!old_rq)
-			return;
+			return BLK_QC_T_NONE;
 		if (!blk_mq_direct_issue_request(old_rq))
-			return;
+			return BLK_QC_T_NONE;
 		blk_mq_insert_request(old_rq, false, true, true);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1320,13 +1320,14 @@ run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 	blk_mq_put_ctx(data.ctx);
+	return BLK_QC_T_NONE;
 }
 
 /*
  * Single hardware queue variant. This will attempt to use any per-process
  * plug for merging and IO deferral.
  */
-static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = rw_is_sync(bio->bi_rw);
 	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
@@ -1339,18 +1340,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	blk_queue_split(q, &bio, q->bio_split);
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, NULL))
-		return;
+		return BLK_QC_T_NONE;
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
-		return;
+		return BLK_QC_T_NONE;
 
 	if (unlikely(is_flush_fua)) {
 		blk_mq_bio_to_request(rq, bio);
@@ -1374,7 +1375,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		}
 		list_add_tail(&rq->queuelist, &plug->mq_list);
 		blk_mq_put_ctx(data.ctx);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1389,6 +1390,7 @@ run_queue:
 	}
 
 	blk_mq_put_ctx(data.ctx);
+	return BLK_QC_T_NONE;
 }
 
 /*
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b9794aeeb878..c9f9c30d6467 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -323,7 +323,7 @@ out:
 	return err;
 }
 
-static void brd_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	struct brd_device *brd = bdev->bd_disk->private_data;
@@ -358,9 +358,10 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
 
 out:
 	bio_endio(bio);
-	return;
+	return BLK_QC_T_NONE;
 io_error:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int brd_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 015c6e91b756..e66d453a5f2b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1448,7 +1448,7 @@ extern int proc_details;
 /* drbd_req */
 extern void do_submit(struct work_struct *ws);
 extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
-extern void drbd_make_request(struct request_queue *q, struct bio *bio);
+extern blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio);
 extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
 extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 211592682169..3ae2c0086563 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1494,7 +1494,7 @@ void do_submit(struct work_struct *ws)
 	}
 }
 
-void drbd_make_request(struct request_queue *q, struct bio *bio)
+blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
 	unsigned long start_jif;
@@ -1510,6 +1510,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
 
 	inc_ap_bio(device);
 	__drbd_make_request(device, bio, start_jif);
+	return BLK_QC_T_NONE;
 }
 
 void request_timer_fn(unsigned long data)
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 1c9e4fe5aa44..6255d1c4bba4 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -321,7 +321,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
 	return &nullb->queues[index];
 }
 
-static void null_queue_bio(struct request_queue *q, struct bio *bio)
+static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	struct nullb *nullb = q->queuedata;
 	struct nullb_queue *nq = nullb_to_queue(nullb);
@@ -331,6 +331,7 @@ static void null_queue_bio(struct request_queue *q, struct bio *bio)
 	cmd->bio = bio;
 
 	null_handle_cmd(cmd);
+	return BLK_QC_T_NONE;
 }
 
 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7be2375db7f2..a7f4abcedee1 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2441,7 +2441,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
 	}
 }
 
-static void pkt_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct pktcdvd_device *pd;
 	char b[BDEVNAME_SIZE];
@@ -2467,7 +2467,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 	 */
 	if (bio_data_dir(bio) == READ) {
 		pkt_make_request_read(pd, bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
@@ -2499,13 +2499,12 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 		pkt_make_request_write(q, split);
 	} while (split != bio);
 
-	return;
+	return BLK_QC_T_NONE;
 end_io:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
-
-
 static void pkt_init_queue(struct pktcdvd_device *pd)
 {
 	struct request_queue *q = pd->disk->queue;
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index d89fcac59515..56847fcda086 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -598,7 +598,7 @@ out:
 	return next;
 }
 
-static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct ps3_system_bus_device *dev = q->queuedata;
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -614,11 +614,13 @@ static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
 	spin_unlock_irq(&priv->lock);
 
 	if (busy)
-		return;
+		return BLK_QC_T_NONE;
 
 	do {
 		bio = ps3vram_do_bio(dev, bio);
 	} while (bio);
+
+	return BLK_QC_T_NONE;
 }
 
 static int ps3vram_probe(struct ps3_system_bus_device *dev)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 3163e4cdc2cc..e1b8b7061d2f 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -145,7 +145,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 	}
 }
 
-static void rsxx_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct rsxx_cardinfo *card = q->queuedata;
 	struct rsxx_bio_meta *bio_meta;
@@ -199,7 +199,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 	if (st)
 		goto queue_err;
 
-	return;
+	return BLK_QC_T_NONE;
 
 queue_err:
 	kmem_cache_free(bio_meta_pool, bio_meta);
@@ -207,6 +207,7 @@ req_err:
 	if (st)
 		bio->bi_error = st;
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 /*----------------- Device Setup -------------------*/
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 04d65790a886..7939b9f87441 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -524,7 +524,7 @@ static int mm_check_plugged(struct cardinfo *card)
 	return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
 }
 
-static void mm_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct cardinfo *card = q->queuedata;
 	pr_debug("mm_make_request %llu %u\n",
@@ -541,7 +541,7 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
 		activate(card);
 	spin_unlock_irq(&card->lock);
 
-	return;
+	return BLK_QC_T_NONE;
 }
 
 static irqreturn_t mm_interrupt(int irq, void *__card)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9fa15bb9d118..4c99b6ba8681 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -894,7 +894,7 @@ out:
 /*
  * Handler function for all zram I/O requests.
  */
-static void zram_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct zram *zram = queue->queuedata;
 
@@ -911,11 +911,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
 
 	__zram_make_request(zram, bio);
 	zram_meta_put(zram);
-	return;
+	return BLK_QC_T_NONE;
 put_zram:
 	zram_meta_put(zram);
 error:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static void zram_slot_free_notify(struct block_device *bdev,
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 64a888a5e9b3..7ba64c87ba1c 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -803,7 +803,7 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 	return NVM_IO_OK;
 }
 
-static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
+static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 {
 	struct rrpc *rrpc = q->queuedata;
 	struct nvm_rq *rqd;
@@ -811,21 +811,21 @@ static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
 
 	if (bio->bi_rw & REQ_DISCARD) {
 		rrpc_discard(rrpc, bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
 	if (!rqd) {
 		pr_err_ratelimited("rrpc: not able to queue bio.");
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 	memset(rqd, 0, sizeof(struct nvm_rq));
 
 	err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
 	switch (err) {
 	case NVM_IO_OK:
-		return;
+		return BLK_QC_T_NONE;
 	case NVM_IO_ERR:
 		bio_io_error(bio);
 		break;
@@ -841,6 +841,7 @@ static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	}
 
 	mempool_free(rqd, rrpc->rq_pool);
+	return BLK_QC_T_NONE;
 }
 
 static void rrpc_requeue(struct work_struct *work)
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 8e9877b04637..25fa8445bb24 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -958,7 +958,8 @@ static void cached_dev_nodata(struct closure *cl)
 
 /* Cached devices - read & write stuff */
 
-static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t cached_dev_make_request(struct request_queue *q,
+					struct bio *bio)
 {
 	struct search *s;
 	struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
@@ -997,6 +998,8 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
 		else
 			generic_make_request(bio);
 	}
+
+	return BLK_QC_T_NONE;
 }
 
 static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
@@ -1070,7 +1073,8 @@ static void flash_dev_nodata(struct closure *cl)
 	continue_at(cl, search_free, NULL);
 }
 
-static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t flash_dev_make_request(struct request_queue *q,
+					     struct bio *bio)
 {
 	struct search *s;
 	struct closure *cl;
@@ -1093,7 +1097,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
 		continue_at_nobarrier(&s->cl,
 				      flash_dev_nodata,
 				      bcache_wq);
-		return;
+		return BLK_QC_T_NONE;
 	} else if (rw) {
 		bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
 					&KEY(d->id, bio->bi_iter.bi_sector, 0),
@@ -1109,6 +1113,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	continue_at(cl, search_free, NULL);
+	return BLK_QC_T_NONE;
 }
 
 static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 32440ad5f684..6e15f3565892 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1755,7 +1755,7 @@ static void __split_and_process_bio(struct mapped_device *md,
  * The request function that just remaps the bio built up by
  * dm_merge_bvec.
  */
-static void dm_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 {
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
@@ -1774,12 +1774,12 @@ static void dm_make_request(struct request_queue *q, struct bio *bio)
 			queue_io(md, bio);
 		else
 			bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 
 	__split_and_process_bio(md, map, bio);
 	dm_put_live_table(md, srcu_idx);
-	return;
+	return BLK_QC_T_NONE;
 }
 
 int dm_request_based(struct mapped_device *md)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3f9a514b5b9d..807095f4c793 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -250,7 +250,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
-static void md_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
 	struct mddev *mddev = q->queuedata;
@@ -262,13 +262,13 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
 	if (mddev == NULL || mddev->pers == NULL
 	    || !mddev->ready) {
 		bio_io_error(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 	if (mddev->ro == 1 && unlikely(rw == WRITE)) {
 		if (bio_sectors(bio) != 0)
 			bio->bi_error = -EROFS;
 		bio_endio(bio);
-		return;
+		return BLK_QC_T_NONE;
 	}
 	smp_rmb(); /* Ensure implications of  'active' are visible */
 	rcu_read_lock();
@@ -302,6 +302,8 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
 
 	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
 		wake_up(&mddev->sb_wait);
+
+	return BLK_QC_T_NONE;
 }
 
 /* mddev_suspend makes sure no new requests are submitted
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 0df77cb07df6..91a336ea8c4f 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -161,7 +161,7 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
 	return err;
 }
 
-static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	struct gendisk *disk = bdev->bd_disk;
@@ -208,6 +208,7 @@ static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
 
  out:
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index eae93ab8ffcd..efb2c1ceef98 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1150,7 +1150,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
 	return ret;
 }
 
-static void btt_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct btt *btt = q->queuedata;
@@ -1198,6 +1198,7 @@ static void btt_make_request(struct request_queue *q, struct bio *bio)
 
 out:
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int btt_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 0ba6a978f227..3963b7533b65 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -64,7 +64,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	kunmap_atomic(mem);
 }
 
-static void pmem_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
 	bool do_acct;
 	unsigned long start;
@@ -84,6 +84,7 @@ static void pmem_make_request(struct request_queue *q, struct bio *bio)
 		wmb_pmem();
 
 	bio_endio(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 5ed44fe21380..94a8f4ab57bc 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -27,7 +27,8 @@
 
 static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
-static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
+static blk_qc_t dcssblk_make_request(struct request_queue *q,
+						struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
 			 void __pmem **kaddr, unsigned long *pfn);
 
@@ -815,7 +816,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode)
 	up_write(&dcssblk_devices_sem);
 }
 
-static void
+static blk_qc_t
 dcssblk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct dcssblk_dev_info *dev_info;
@@ -874,9 +875,10 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 		bytes_done += bvec.bv_len;
 	}
 	bio_endio(bio);
-	return;
+	return BLK_QC_T_NONE;
 fail:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static long
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 02871f1db562..288f59a4147b 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -181,7 +181,7 @@ static unsigned long xpram_highest_page_index(void)
 /*
  * Block device make request function.
  */
-static void xpram_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
 {
 	xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
 	struct bio_vec bvec;
@@ -223,9 +223,10 @@ static void xpram_make_request(struct request_queue *q, struct bio *bio)
 		}
 	}
 	bio_endio(bio);
-	return;
+	return BLK_QC_T_NONE;
 fail:
 	bio_io_error(bio);
+	return BLK_QC_T_NONE;
 }
 
 static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index e6974c36276d..fed50d538a41 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -333,7 +333,7 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
 	return count;
 }
 
-static void loop_make_request(struct request_queue *q, struct bio *old_bio)
+static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio)
 {
 	struct lloop_device *lo = q->queuedata;
 	int rw = bio_rw(old_bio);
@@ -364,9 +364,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
 		goto err;
 	}
 	loop_add_bio(lo, old_bio);
-	return;
+	return BLK_QC_T_NONE;
 err:
 	bio_io_error(old_bio);
+	return BLK_QC_T_NONE;
 }
 
 static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e8130138f29d..641e5a3ed58c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -244,4 +244,28 @@ enum rq_flag_bits {
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 #define REQ_NO_TIMEOUT		(1ULL << __REQ_NO_TIMEOUT)
 
+typedef unsigned int blk_qc_t;
+#define BLK_QC_T_NONE	-1U
+#define BLK_QC_T_SHIFT	16
+
+static inline bool blk_qc_t_valid(blk_qc_t cookie)
+{
+	return cookie != BLK_QC_T_NONE;
+}
+
+static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num)
+{
+	return tag | (queue_num << BLK_QC_T_SHIFT);
+}
+
+static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
+{
+	return cookie >> BLK_QC_T_SHIFT;
+}
+
+static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
+{
+	return cookie & 0xffff;
+}
+
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d045ca8487af..5ee0f5243025 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -209,7 +209,7 @@ static inline unsigned short req_get_ioprio(struct request *req)
 struct blk_queue_ctx;
 
 typedef void (request_fn_proc) (struct request_queue *q);
-typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
+typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
 
@@ -761,7 +761,7 @@ static inline void rq_flush_dcache_pages(struct request *rq)
 
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
-extern void generic_make_request(struct bio *bio);
+extern blk_qc_t generic_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 72d8a844c692..bcca36e4bc1e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2625,7 +2625,7 @@ static inline void remove_inode_hash(struct inode *inode)
 extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
-extern void submit_bio(int, struct bio *);
+extern blk_qc_t submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
 extern int set_blocksize(struct block_device *, int);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 5ebd70d12f35..69c9057e1ab8 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -426,7 +426,7 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
 	return ppa;
 }
 
-typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
+typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
 typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
-- 
cgit v1.2.3


From 05229beeddf7e75e2e616ddaad4b70e7fca9528d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Nov 2015 10:44:55 -0700
Subject: block: add block polling support

Add basic support for polling for specific IO to complete. This uses
the cookie that blk-mq passes back, which enables the block layer
to pass this cookie to the driver to spin for a specific request.

This will be combined with request latency tracking, so we can make
qualified decisions about when to poll and when not to. For now, for
benchmark purposes, we add a sysfs file that controls whether polling
is enabled or not.

Signed-off-by: Jens Axboe <axboe@fb.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
---
 block/blk-core.c       | 41 +++++++++++++++++++++++++++++++++++++++++
 block/blk-mq-sysfs.c   | 10 ++++++++++
 block/blk-sysfs.c      | 35 +++++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h | 10 ++++++++++
 include/linux/blkdev.h |  3 +++
 5 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index e93df6d386a0..fa36b4ff7d63 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3312,6 +3312,47 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
+bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+{
+	struct blk_plug *plug;
+	long state;
+
+	if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
+	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+		return false;
+
+	plug = current->plug;
+	if (plug)
+		blk_flush_plug_list(plug, false);
+
+	state = current->state;
+	while (!need_resched()) {
+		unsigned int queue_num = blk_qc_t_to_queue_num(cookie);
+		struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num];
+		int ret;
+
+		hctx->poll_invoked++;
+
+		ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
+		if (ret > 0) {
+			hctx->poll_success++;
+			set_current_state(TASK_RUNNING);
+			return true;
+		}
+
+		if (signal_pending_state(state, current))
+			set_current_state(TASK_RUNNING);
+
+		if (current->state == TASK_RUNNING)
+			return true;
+		if (ret < 0)
+			break;
+		cpu_relax();
+	}
+
+	return false;
+}
+
 #ifdef CONFIG_PM
 /**
  * blk_pm_runtime_init - Block layer runtime PM initialization routine
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 6f57a110289c..1cf18784c5cf 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -174,6 +174,11 @@ static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
 	return ret;
 }
 
+static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	return sprintf(page, "invoked=%lu, success=%lu\n", hctx->poll_invoked, hctx->poll_success);
+}
+
 static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
 					   char *page)
 {
@@ -295,6 +300,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
 	.attr = {.name = "cpu_list", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_cpus_show,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
+	.attr = {.name = "io_poll", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_poll_show,
+};
 
 static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_queued.attr,
@@ -304,6 +313,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_tags.attr,
 	&blk_mq_hw_sysfs_cpus.attr,
 	&blk_mq_hw_sysfs_active.attr,
+	&blk_mq_hw_sysfs_poll.attr,
 	NULL,
 };
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 31849e328b45..565b8dac5782 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -317,6 +317,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 	return ret;
 }
 
+static ssize_t queue_poll_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
+}
+
+static ssize_t queue_poll_store(struct request_queue *q, const char *page,
+				size_t count)
+{
+	unsigned long poll_on;
+	ssize_t ret;
+
+	if (!q->mq_ops || !q->mq_ops->poll)
+		return -EINVAL;
+
+	ret = queue_var_store(&poll_on, page, count);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irq(q->queue_lock);
+	if (poll_on)
+		queue_flag_set(QUEUE_FLAG_POLL, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_POLL, q);
+	spin_unlock_irq(q->queue_lock);
+
+	return ret;
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -442,6 +470,12 @@ static struct queue_sysfs_entry queue_random_entry = {
 	.store = queue_store_random,
 };
 
+static struct queue_sysfs_entry queue_poll_entry = {
+	.attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_poll_show,
+	.store = queue_poll_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -466,6 +500,7 @@ static struct attribute *default_attrs[] = {
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
 	&queue_random_entry.attr,
+	&queue_poll_entry.attr,
 	NULL,
 };
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 83cc9d4e5455..daf17d70aeca 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -59,6 +59,9 @@ struct blk_mq_hw_ctx {
 
 	struct blk_mq_cpu_notifier	cpu_notifier;
 	struct kobject		kobj;
+
+	unsigned long		poll_invoked;
+	unsigned long		poll_success;
 };
 
 struct blk_mq_tag_set {
@@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int,
 typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
 		bool);
 typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
+typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
+
 
 struct blk_mq_ops {
 	/*
@@ -114,6 +119,11 @@ struct blk_mq_ops {
 	 */
 	timeout_fn		*timeout;
 
+	/*
+	 * Called to poll for completion of a specific tag.
+	 */
+	poll_fn			*poll;
+
 	softirq_done_fn		*complete;
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5ee0f5243025..3fe27f8d91f0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -487,6 +487,7 @@ struct request_queue {
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 #define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
+#define QUEUE_FLAG_POLL	       22	/* IO polling enabled if set */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
+bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue;	/* this is never NULL */
-- 
cgit v1.2.3


From 5037835c1f3eabf4f22163fc0278dd87165f8957 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Mon, 5 Oct 2015 16:33:36 -0600
Subject: coredump: add DAX filtering for ELF coredumps

Add two new flags to the existing coredump mechanism for ELF files to
allow us to explicitly filter DAX mappings.  This is desirable because
DAX mappings, like hugetlb mappings, have the potential to be very
large.

Update the coredump_filter documentation in
Documentation/filesystems/proc.txt so that it addresses the new DAX
coredump flags.  Also update the documented default value of
coredump_filter to be consistent with the core(5) man page.  The
documentation being updated talks about bit 4, Dump ELF headers, which
is enabled if CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is turned on in the
kernel config.  This kernel config option defaults to "y" if both ELF
binaries and coredump are enabled.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/filesystems/proc.txt | 22 ++++++++++++----------
 fs/binfmt_elf.c                    | 10 ++++++++++
 include/linux/sched.h              |  4 +++-
 3 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d411ca63c8b6..6f887cf873a1 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1598,16 +1598,16 @@ Documentation/accounting.
 ---------------------------------------------------------------
 When a process is dumped, all anonymous memory is written to a core file as
 long as the size of the core file isn't limited. But sometimes we don't want
-to dump some memory segments, for example, huge shared memory. Conversely,
-sometimes we want to save file-backed memory segments into a core file, not
-only the individual files.
+to dump some memory segments, for example, huge shared memory or DAX.
+Conversely, sometimes we want to save file-backed memory segments into a core
+file, not only the individual files.
 
 /proc/<pid>/coredump_filter allows you to customize which memory segments
 will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
 of memory types. If a bit of the bitmask is set, memory segments of the
 corresponding memory type are dumped, otherwise they are not dumped.
 
-The following 7 memory types are supported:
+The following 9 memory types are supported:
   - (bit 0) anonymous private memory
   - (bit 1) anonymous shared memory
   - (bit 2) file-backed private memory
@@ -1616,20 +1616,22 @@ The following 7 memory types are supported:
             effective only if the bit 2 is cleared)
   - (bit 5) hugetlb private memory
   - (bit 6) hugetlb shared memory
+  - (bit 7) DAX private memory
+  - (bit 8) DAX shared memory
 
   Note that MMIO pages such as frame buffer are never dumped and vDSO pages
   are always dumped regardless of the bitmask status.
 
-  Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
-  effected by bit 5-6.
+  Note that bits 0-4 don't affect hugetlb or DAX memory. hugetlb memory is
+  only affected by bit 5-6, and DAX is only affected by bits 7-8.
 
-Default value of coredump_filter is 0x23; this means all anonymous memory
-segments and hugetlb private memory are dumped.
+The default value of coredump_filter is 0x33; this means all anonymous memory
+segments, ELF header pages and hugetlb private memory are dumped.
 
 If you don't want to dump all shared memory segments attached to pid 1234,
-write 0x21 to the process's proc file.
+write 0x31 to the process's proc file.
 
-  $ echo 0x21 > /proc/1234/coredump_filter
+  $ echo 0x31 > /proc/1234/coredump_filter
 
 When a new process is created, the process inherits the bitmask status from its
 parent. It is useful to set up coredump_filter before the program runs.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6b659967898e..5f399ea1d20a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
 #include <linux/utsname.h>
 #include <linux/coredump.h>
 #include <linux/sched.h>
+#include <linux/dax.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1236,6 +1237,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	if (vma->vm_flags & VM_DONTDUMP)
 		return 0;
 
+	/* support for DAX */
+	if (vma_is_dax(vma)) {
+		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
+			goto whole;
+		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
+			goto whole;
+		return 0;
+	}
+
 	/* Hugetlb memory check */
 	if (vma->vm_flags & VM_HUGETLB) {
 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7b9501b41af..3c02d92ed23b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -483,9 +483,11 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_DUMP_ELF_HEADERS	6
 #define MMF_DUMP_HUGETLB_PRIVATE 7
 #define MMF_DUMP_HUGETLB_SHARED  8
+#define MMF_DUMP_DAX_PRIVATE	9
+#define MMF_DUMP_DAX_SHARED	10
 
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	7
+#define MMF_DUMP_FILTER_BITS	9
 #define MMF_DUMP_FILTER_MASK \
 	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \
-- 
cgit v1.2.3


From c8299cb605b27dd5a49f7a69e48fd23e5a206298 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Mon, 9 Nov 2015 14:58:10 -0800
Subject: kernel.h: make abs() work with 64-bit types

For 64-bit arguments, the abs macro casts it to an int which leads to
lost precision and may cause incorrect results.  To deal with 64-bit
types abs64 macro has been introduced but still there are places where
abs macro is used incorrectly.

To deal with the problem, expand abs macro such that it operates on s64
type when dealing with 64-bit types while still returning long when
dealing with smaller types.

This fixes one known bug (per John):

The internal clocksteering done for fine-grained error correction uses a
: logarithmic approximation, so any time adjtimex() adjusts the clock
: steering, timekeeping_freqadjust() quickly approximates the correct clock
: frequency over a series of ticks.
:
: Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit
: dc491596f639438 (Rework frequency adjustments to work better w/ nohz),
: used the abs() function with a s64 error value to calculate the size of
: the approximated adjustment to be made.
:
: Per include/linux/kernel.h: "abs() should not be used for 64-bit types
: (s64, u64, long long) - use abs64()".
:
: Thus on 32-bit platforms, this resulted in the clocksteering to take a
: quite dampended random walk trying to converge on the proper frequency,
: which caused the adjustments to be made much slower then intended (most
: easily observed when large adjustments are made).

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2c13f747ac2e..05ce782d53ab 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -200,28 +200,31 @@ extern int _cond_resched(void);
 
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
 
-/*
- * abs() handles unsigned and signed longs, ints, shorts and chars.  For all
- * input types abs() returns a signed long.
- * abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()
- * for those.
+/**
+ * abs - return absolute value of an argument
+ * @x: the value.  If it is unsigned type, it is converted to signed type first
+ *   (s64, long or int depending on its size).
+ *
+ * Return: an absolute value of x.  If x is 64-bit, macro's return type is s64,
+ *   otherwise it is signed long.
  */
-#define abs(x) ({						\
-		long ret;					\
-		if (sizeof(x) == sizeof(long)) {		\
-			long __x = (x);				\
-			ret = (__x < 0) ? -__x : __x;		\
-		} else {					\
-			int __x = (x);				\
-			ret = (__x < 0) ? -__x : __x;		\
-		}						\
-		ret;						\
-	})
-
-#define abs64(x) ({				\
-		s64 __x = (x);			\
-		(__x < 0) ? -__x : __x;		\
-	})
+#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({	\
+		s64 __x = (x);						\
+		(__x < 0) ? -__x : __x;					\
+	}), ({								\
+		long ret;						\
+		if (sizeof(x) == sizeof(long)) {			\
+			long __x = (x);					\
+			ret = (__x < 0) ? -__x : __x;			\
+		} else {						\
+			int __x = (x);					\
+			ret = (__x < 0) ? -__x : __x;			\
+		}							\
+		ret;							\
+	}))
+
+/* Deprecated, use abs instead. */
+#define abs64(x) abs((s64)(x))
 
 /**
  * reciprocal_scale - "scale" a value into range [0, ep_ro)
-- 
cgit v1.2.3


From 79211c8ed19c055ca105502c8733800d442a0ae6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 9 Nov 2015 14:58:13 -0800
Subject: remove abs64()

Switch everything to the new and more capable implementation of abs().
Mainly to give the new abs() a bit of a workout.

Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_irq.c             | 4 ++--
 drivers/gpu/drm/tegra/sor.c           | 4 ++--
 drivers/input/joystick/walkera0701.c  | 4 ++--
 drivers/media/i2c/ov9650.c            | 2 +-
 drivers/net/wireless/mac80211_hwsim.c | 2 +-
 drivers/thermal/power_allocator.c     | 2 +-
 fs/ext4/mballoc.c                     | 4 ++--
 fs/gfs2/lock_dlm.c                    | 2 +-
 include/linux/kernel.h                | 3 ---
 kernel/time/clocksource.c             | 2 +-
 kernel/time/timekeeping.c             | 2 +-
 lib/div64.c                           | 2 +-
 net/sctp/transport.c                  | 2 +-
 13 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 22d207e211e7..22d8b78d537e 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -261,7 +261,7 @@ static void vblank_disable_and_save(struct drm_device *dev, unsigned int pipe)
 	 * available. In that case we can't account for this and just
 	 * hope for the best.
 	 */
-	if (vblrc && (abs64(diff_ns) > 1000000))
+	if (vblrc && (abs(diff_ns) > 1000000))
 		store_vblank(dev, pipe, 1, &tvblank);
 
 	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
@@ -1772,7 +1772,7 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
 	 * e.g., due to spurious vblank interrupts. We need to
 	 * ignore those for accounting.
 	 */
-	if (abs64(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS)
+	if (abs(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS)
 		store_vblank(dev, pipe, 1, &tvblank);
 	else
 		DRM_DEBUG("crtc %u: Redundant vblirq ignored. diff_ns = %d\n",
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index da1715ebdd71..3eff7cf75d25 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -555,11 +555,11 @@ static int tegra_sor_compute_params(struct tegra_sor *sor,
 	error = div_s64(active_sym - approx, tu_size);
 	error *= params->num_clocks;
 
-	if (error <= 0 && abs64(error) < params->error) {
+	if (error <= 0 && abs(error) < params->error) {
 		params->active_count = div_u64(active_count, f);
 		params->active_polarity = active_polarity;
 		params->active_frac = active_frac;
-		params->error = abs64(error);
+		params->error = abs(error);
 		params->tu_size = tu_size;
 
 		if (error == 0)
diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c
index d88f5dd3c9d9..9c07fe911075 100644
--- a/drivers/input/joystick/walkera0701.c
+++ b/drivers/input/joystick/walkera0701.c
@@ -150,7 +150,7 @@ static void walkera0701_irq_handler(void *handler_data)
 		if (w->counter == 24) {	/* full frame */
 			walkera0701_parse_frame(w);
 			w->counter = NO_SYNC;
-			if (abs64(pulse_time - SYNC_PULSE) < RESERVE)	/* new frame sync */
+			if (abs(pulse_time - SYNC_PULSE) < RESERVE)	/* new frame sync */
 				w->counter = 0;
 		} else {
 			if ((pulse_time > (ANALOG_MIN_PULSE - RESERVE)
@@ -161,7 +161,7 @@ static void walkera0701_irq_handler(void *handler_data)
 			} else
 				w->counter = NO_SYNC;
 		}
-	} else if (abs64(pulse_time - SYNC_PULSE - BIN0_PULSE) <
+	} else if (abs(pulse_time - SYNC_PULSE - BIN0_PULSE) <
 				RESERVE + BIN1_PULSE - BIN0_PULSE)	/* frame sync .. */
 		w->counter = 0;
 
diff --git a/drivers/media/i2c/ov9650.c b/drivers/media/i2c/ov9650.c
index e691bba1945b..1ee6a5527c38 100644
--- a/drivers/media/i2c/ov9650.c
+++ b/drivers/media/i2c/ov9650.c
@@ -1133,7 +1133,7 @@ static int __ov965x_set_frame_interval(struct ov965x *ov965x,
 		if (mbus_fmt->width != iv->size.width ||
 		    mbus_fmt->height != iv->size.height)
 			continue;
-		err = abs64((u64)(iv->interval.numerator * 10000) /
+		err = abs((u64)(iv->interval.numerator * 10000) /
 			    iv->interval.denominator - req_int);
 		if (err < min_err) {
 			fiv = iv;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index ee46f4647fbc..c00a7daaa4bc 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -787,7 +787,7 @@ static void mac80211_hwsim_set_tsf(struct ieee80211_hw *hw,
 	struct mac80211_hwsim_data *data = hw->priv;
 	u64 now = mac80211_hwsim_get_tsf(hw, vif);
 	u32 bcn_int = data->beacon_int;
-	u64 delta = abs64(tsf - now);
+	u64 delta = abs(tsf - now);
 
 	/* adjust after beaconing with new timestamp at old TBTT */
 	if (tsf > now) {
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index e570ff084add..f0fbea386869 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -228,7 +228,7 @@ static u32 pid_controller(struct thermal_zone_device *tz,
 	if (err < int_to_frac(tz->tzp->integral_cutoff)) {
 		s64 i_next = i + mul_frac(tz->tzp->k_i, err);
 
-		if (abs64(i_next) < max_power_frac) {
+		if (abs(i_next) < max_power_frac) {
 			i = i_next;
 			params->err_integral += err;
 		}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b4b3c1f91814..61eaf74dca37 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3333,8 +3333,8 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
 		atomic_inc(&pa->pa_count);
 		return pa;
 	}
-	cur_distance = abs64(goal_block - cpa->pa_pstart);
-	new_distance = abs64(goal_block - pa->pa_pstart);
+	cur_distance = abs(goal_block - cpa->pa_pstart);
+	new_distance = abs(goal_block - pa->pa_pstart);
 
 	if (cur_distance <= new_distance)
 		return cpa;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 284c1542783e..8b907c5cc913 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -50,7 +50,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
 	s64 delta = sample - s->stats[index];
 	s->stats[index] += (delta >> 3);
 	index++;
-	s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
+	s->stats[index] += ((abs(delta) - s->stats[index]) >> 2);
 }
 
 /**
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 05ce782d53ab..350dfb08aee3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -223,9 +223,6 @@ extern int _cond_resched(void);
 		ret;							\
 	}))
 
-/* Deprecated, use abs instead. */
-#define abs64(x) abs((s64)(x))
-
 /**
  * reciprocal_scale - "scale" a value into range [0, ep_ro)
  * @val: value
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 0d8fe8b8f727..1347882d131e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 
 		/* Check the deviation from the watchdog clocksource. */
-		if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 			pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				cs->name);
 			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b1356b7ae570..d563c1960302 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
 	negative = (tick_error < 0);
 
 	/* Sort out the magnitude of the correction */
-	tick_error = abs64(tick_error);
+	tick_error = abs(tick_error);
 	for (adj = 0; tick_error > interval; adj++)
 		tick_error >>= 1;
 
diff --git a/lib/div64.c b/lib/div64.c
index 19ea7ed4b948..62a698a432bc 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -162,7 +162,7 @@ s64 div64_s64(s64 dividend, s64 divisor)
 {
 	s64 quot, t;
 
-	quot = div64_u64(abs64(dividend), abs64(divisor));
+	quot = div64_u64(abs(dividend), abs(divisor));
 	t = (dividend ^ divisor) >> 63;
 
 	return (quot ^ t) - t;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a0a431824f63..aab9e3f29755 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -331,7 +331,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 		 * 1/8, rto_alpha would be expressed as 3.
 		 */
 		tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
-			+ (((__u32)abs64((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
+			+ (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
 		tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
 			+ (rtt >> net->sctp.rto_alpha);
 	} else {
-- 
cgit v1.2.3


From 77c5b5da02f0a30d61144a546c4ef3657e3b817d Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 9 Nov 2015 14:58:23 -0800
Subject: kmap_atomic_to_page() has no users, remove it

Removal started in commit 5bbeed12bdc3 ("sparc32: drop unused
kmap_atomic_to_page").  Let's do it across the whole tree.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/highmem.h        |  1 -
 arch/arm/mm/highmem.c                 | 10 ----------
 arch/frv/include/asm/highmem.h        |  2 --
 arch/frv/mm/highmem.c                 |  5 -----
 arch/metag/include/asm/highmem.h      |  1 -
 arch/metag/mm/highmem.c               | 14 --------------
 arch/microblaze/include/asm/highmem.h | 13 -------------
 arch/mips/include/asm/highmem.h       |  1 -
 arch/mips/mm/highmem.c                | 13 -------------
 arch/parisc/include/asm/cacheflush.h  |  1 -
 arch/powerpc/include/asm/highmem.h    | 13 -------------
 arch/tile/include/asm/highmem.h       |  1 -
 arch/tile/mm/highmem.c                | 12 ------------
 arch/x86/include/asm/highmem.h        |  1 -
 arch/x86/mm/highmem_32.c              | 14 --------------
 include/linux/highmem.h               |  1 -
 16 files changed, 103 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 535579511ed0..0a0e2d1784c0 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -68,7 +68,6 @@ extern void kunmap(struct page *page);
 extern void *kmap_atomic(struct page *page);
 extern void __kunmap_atomic(void *kvaddr);
 extern void *kmap_atomic_pfn(unsigned long pfn);
-extern struct page *kmap_atomic_to_page(const void *ptr);
 #endif
 
 #endif
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 9df5f09585ca..d02f8187b1cc 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -147,13 +147,3 @@ void *kmap_atomic_pfn(unsigned long pfn)
 
 	return (void *)vaddr;
 }
-
-struct page *kmap_atomic_to_page(const void *ptr)
-{
-	unsigned long vaddr = (unsigned long)ptr;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	return pte_page(get_fixmap_pte(vaddr));
-}
diff --git a/arch/frv/include/asm/highmem.h b/arch/frv/include/asm/highmem.h
index b3adc93611f3..1f58938703ab 100644
--- a/arch/frv/include/asm/highmem.h
+++ b/arch/frv/include/asm/highmem.h
@@ -62,8 +62,6 @@ extern void kunmap_high(struct page *page);
 extern void *kmap(struct page *page);
 extern void kunmap(struct page *page);
 
-extern struct page *kmap_atomic_to_page(void *ptr);
-
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
index 785344bbdc07..45750fb65c49 100644
--- a/arch/frv/mm/highmem.c
+++ b/arch/frv/mm/highmem.c
@@ -32,11 +32,6 @@ void kunmap(struct page *page)
 
 EXPORT_SYMBOL(kunmap);
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	return virt_to_page(ptr);
-}
-
 void *kmap_atomic(struct page *page)
 {
 	unsigned long paddr;
diff --git a/arch/metag/include/asm/highmem.h b/arch/metag/include/asm/highmem.h
index 6646a15c73dd..9b1d172cd884 100644
--- a/arch/metag/include/asm/highmem.h
+++ b/arch/metag/include/asm/highmem.h
@@ -56,7 +56,6 @@ extern void kunmap(struct page *page);
 extern void *kmap_atomic(struct page *page);
 extern void __kunmap_atomic(void *kvaddr);
 extern void *kmap_atomic_pfn(unsigned long pfn);
-extern struct page *kmap_atomic_to_page(void *ptr);
 #endif
 
 #endif
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
index 807f1b1c4e65..f19a87f2c1ec 100644
--- a/arch/metag/mm/highmem.c
+++ b/arch/metag/mm/highmem.c
@@ -111,20 +111,6 @@ void *kmap_atomic_pfn(unsigned long pfn)
 	return (void *)vaddr;
 }
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long vaddr = (unsigned long)ptr;
-	int idx;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 void __init kmap_init(void)
 {
 	unsigned long kmap_vstart;
diff --git a/arch/microblaze/include/asm/highmem.h b/arch/microblaze/include/asm/highmem.h
index d04638932438..67925ef18cfa 100644
--- a/arch/microblaze/include/asm/highmem.h
+++ b/arch/microblaze/include/asm/highmem.h
@@ -76,19 +76,6 @@ static inline void *kmap_atomic(struct page *page)
 	return kmap_atomic_prot(page, kmap_prot);
 }
 
-static inline struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long) ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 #define flush_cache_kmaps()	{ flush_icache(); flush_dcache(); }
 
 #endif /* __KERNEL__ */
diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h
index 572e63ec2a38..01880b34a209 100644
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h
@@ -49,7 +49,6 @@ extern void kunmap(struct page *page);
 extern void *kmap_atomic(struct page *page);
 extern void __kunmap_atomic(void *kvaddr);
 extern void *kmap_atomic_pfn(unsigned long pfn);
-extern struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	flush_cache_all()
 
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index 11661cbc11a8..d7258a103439 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -118,19 +118,6 @@ void *kmap_atomic_pfn(unsigned long pfn)
 	return (void*) vaddr;
 }
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long)ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 void __init kmap_init(void)
 {
 	unsigned long kmap_vstart;
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index ec2df4bab302..845272ce9cc5 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -156,7 +156,6 @@ static inline void __kunmap_atomic(void *addr)
 
 #define kmap_atomic_prot(page, prot)	kmap_atomic(page)
 #define kmap_atomic_pfn(pfn)	kmap_atomic(pfn_to_page(pfn))
-#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
 
 #endif /* _PARISC_CACHEFLUSH_H */
 
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index caaf6e00630d..01c2c23b307e 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -84,19 +84,6 @@ static inline void *kmap_atomic(struct page *page)
 	return kmap_atomic_prot(page, kmap_prot);
 }
 
-static inline struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long) ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-
 
 #define flush_cache_kmaps()	flush_cache_all()
 
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index fc8429a31c85..979579b38e57 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -63,7 +63,6 @@ void *kmap_atomic(struct page *page);
 void __kunmap_atomic(void *kvaddr);
 void *kmap_atomic_pfn(unsigned long pfn);
 void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
-struct page *kmap_atomic_to_page(void *ptr);
 void *kmap_atomic_prot(struct page *page, pgprot_t prot);
 void kmap_atomic_fix_kpte(struct page *page, int finished);
 
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index fcd545014e79..eca28551b22d 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -275,15 +275,3 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
 	return kmap_atomic_prot(pfn_to_page(pfn), prot);
 }
-
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	pte_t *pte;
-	unsigned long vaddr = (unsigned long)ptr;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	pte = kmap_get_pte(vaddr);
-	return pte_page(*pte);
-}
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 04e9d023168f..1c0b43724ce3 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -68,7 +68,6 @@ void *kmap_atomic(struct page *page);
 void __kunmap_atomic(void *kvaddr);
 void *kmap_atomic_pfn(unsigned long pfn);
 void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
-struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	do { } while (0)
 
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index eecb207a2037..a6d739258137 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -104,20 +104,6 @@ void __kunmap_atomic(void *kvaddr)
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
-struct page *kmap_atomic_to_page(void *ptr)
-{
-	unsigned long idx, vaddr = (unsigned long)ptr;
-	pte_t *pte;
-
-	if (vaddr < FIXADDR_START)
-		return virt_to_page(ptr);
-
-	idx = virt_to_fix(vaddr);
-	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
-	return pte_page(*pte);
-}
-EXPORT_SYMBOL(kmap_atomic_to_page);
-
 void __init set_highmem_pages_init(void)
 {
 	struct zone *zone;
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 6aefcd0031a6..bb3f3297062a 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -78,7 +78,6 @@ static inline void __kunmap_atomic(void *addr)
 }
 
 #define kmap_atomic_pfn(pfn)	kmap_atomic(pfn_to_page(pfn))
-#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
 
 #define kmap_flush_unused()	do {} while(0)
 #endif
-- 
cgit v1.2.3


From 7bc4f1d281bc1f807fd0c9aaa2f2d333b6508790 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Mon, 9 Nov 2015 14:58:26 -0800
Subject: include/linux/kdev_t.h: remove unused huge_valid_dev()

There's no user of huge_valid_dev() any more, so remove it.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kdev_t.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index c838abe3ee0a..a546d206c7f3 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -54,11 +54,6 @@ static inline dev_t new_decode_dev(u32 dev)
 	return MKDEV(major, minor);
 }
 
-static inline int huge_valid_dev(dev_t dev)
-{
-	return 1;
-}
-
 static inline u64 huge_encode_dev(dev_t dev)
 {
 	return new_encode_dev(dev);
-- 
cgit v1.2.3


From 8b9758b9c6f65f55c94370636c04e976edc93e1a Mon Sep 17 00:00:00 2001
From: Yaowei Bai <bywxiaobai@163.com>
Date: Mon, 9 Nov 2015 14:58:28 -0800
Subject: include/linux/kdev_t.h: old/new_valid_dev() can return bool

Make old/new_valid_dev return bool due to these two particular functions
only using either one or zero as their return value.

No functional change.

Signed-off-by: Yaowei Bai <bywxiaobai@163.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kdev_t.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index a546d206c7f3..052c7b32cc91 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -20,7 +20,7 @@
 	})
 
 /* acceptable for old filesystems */
-static inline int old_valid_dev(dev_t dev)
+static inline bool old_valid_dev(dev_t dev)
 {
 	return MAJOR(dev) < 256 && MINOR(dev) < 256;
 }
@@ -35,7 +35,7 @@ static inline dev_t old_decode_dev(u16 val)
 	return MKDEV((val >> 8) & 255, val & 255);
 }
 
-static inline int new_valid_dev(dev_t dev)
+static inline bool new_valid_dev(dev_t dev)
 {
 	return 1;
 }
-- 
cgit v1.2.3


From 35181e86df97e4223f4a28fb33e2bcf3b73de141 Mon Sep 17 00:00:00 2001
From: Haozhong Zhang <haozhong.zhang@intel.com>
Date: Tue, 20 Oct 2015 15:39:03 +0800
Subject: KVM: x86: Add a common TSC scaling function

VMX and SVM calculate the TSC scaling ratio in a similar logic, so this
patch generalizes it to a common TSC scaling function.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
[Inline the multiplication and shift steps into mul_u64_u64_shr.  Remove
 BUG_ON.  - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm.c              | 48 ++++----------------------------------
 arch/x86/kvm/x86.c              | 40 +++++++++++++++++++++++++++++++-
 include/linux/kvm_host.h        |  1 +
 include/linux/math64.h          | 51 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 97 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f3354bd92364..52d1419968eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1238,6 +1238,8 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c92e6f429d0..65f4f1947a62 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -212,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
 static int nested_svm_vmexit(struct vcpu_svm *svm);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 				      bool has_error_code, u32 error_code);
-static u64 __scale_tsc(u64 ratio, u64 tsc);
 
 enum {
 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -892,21 +891,7 @@ static __init int svm_hardware_setup(void)
 		kvm_enable_efer_bits(EFER_FFXSR);
 
 	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
-		u64 max;
-
 		kvm_has_tsc_control = true;
-
-		/*
-		 * Make sure the user can only configure tsc_khz values that
-		 * fit into a signed integer.
-		 * A min value is not calculated needed because it will always
-		 * be 1 on all machines and a value of 0 is used to disable
-		 * tsc-scaling for the vcpu.
-		 */
-		max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
-
-		kvm_max_guest_tsc_khz = max;
-
 		kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
 		kvm_tsc_scaling_ratio_frac_bits = 32;
 	}
@@ -972,31 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
-static u64 __scale_tsc(u64 ratio, u64 tsc)
-{
-	u64 mult, frac, _tsc;
-
-	mult  = ratio >> 32;
-	frac  = ratio & ((1ULL << 32) - 1);
-
-	_tsc  = tsc;
-	_tsc *= mult;
-	_tsc += (tsc >> 32) * frac;
-	_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
-
-	return _tsc;
-}
-
-static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
-{
-	u64 _tsc = tsc;
-
-	if (vcpu->arch.tsc_scaling_ratio != TSC_RATIO_DEFAULT)
-		_tsc = __scale_tsc(vcpu->arch.tsc_scaling_ratio, tsc);
-
-	return _tsc;
-}
-
 static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 {
 	u64 ratio;
@@ -1065,7 +1025,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
 	if (host) {
 		if (vcpu->arch.tsc_scaling_ratio != TSC_RATIO_DEFAULT)
 			WARN_ON(adjustment < 0);
-		adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+		adjustment = kvm_scale_tsc(vcpu, (u64)adjustment);
 	}
 
 	svm->vmcb->control.tsc_offset += adjustment;
@@ -1083,7 +1043,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 {
 	u64 tsc;
 
-	tsc = svm_scale_tsc(vcpu, rdtsc());
+	tsc = kvm_scale_tsc(vcpu, rdtsc());
 
 	return target_tsc - tsc;
 }
@@ -3075,7 +3035,7 @@ static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
 	struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
 	return vmcb->control.tsc_offset +
-		svm_scale_tsc(vcpu, host_tsc);
+		kvm_scale_tsc(vcpu, host_tsc);
 }
 
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3085,7 +3045,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	switch (msr_info->index) {
 	case MSR_IA32_TSC: {
 		msr_info->data = svm->vmcb->control.tsc_offset +
-			svm_scale_tsc(vcpu, rdtsc());
+			kvm_scale_tsc(vcpu, rdtsc());
 
 		break;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ef5b9d66cd71..1473e64cb744 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1329,6 +1329,33 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
 	vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
 }
 
+/*
+ * Multiply tsc by a fixed point number represented by ratio.
+ *
+ * The most significant 64-N bits (mult) of ratio represent the
+ * integral part of the fixed point number; the remaining N bits
+ * (frac) represent the fractional part, ie. ratio represents a fixed
+ * point number (mult + frac * 2^(-N)).
+ *
+ * N equals to kvm_tsc_scaling_ratio_frac_bits.
+ */
+static inline u64 __scale_tsc(u64 ratio, u64 tsc)
+{
+	return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
+}
+
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
+{
+	u64 _tsc = tsc;
+	u64 ratio = vcpu->arch.tsc_scaling_ratio;
+
+	if (ratio != kvm_default_tsc_scaling_ratio)
+		_tsc = __scale_tsc(ratio, tsc);
+
+	return _tsc;
+}
+EXPORT_SYMBOL_GPL(kvm_scale_tsc);
+
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -7371,8 +7398,19 @@ int kvm_arch_hardware_setup(void)
 	if (r != 0)
 		return r;
 
-	if (kvm_has_tsc_control)
+	if (kvm_has_tsc_control) {
+		/*
+		 * Make sure the user can only configure tsc_khz values that
+		 * fit into a signed integer.
+		 * A min value is not calculated needed because it will always
+		 * be 1 on all machines.
+		 */
+		u64 max = min(0x7fffffffULL,
+			      __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
+		kvm_max_guest_tsc_khz = max;
+
 		kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
+	}
 
 	kvm_init_msr_list();
 	return 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 242a6d2b53ff..5706a2108f0a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
 int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
 				  uint32_t guest_irq, bool set);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
+
 #endif
diff --git a/include/linux/math64.h b/include/linux/math64.h
index c45c089bfdac..44282ec7b682 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 }
 #endif /* mul_u64_u32_shr */
 
+#ifndef mul_u64_u64_shr
+static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
+{
+	return (u64)(((unsigned __int128)a * mul) >> shift);
+}
+#endif /* mul_u64_u64_shr */
+
 #else
 
 #ifndef mul_u64_u32_shr
@@ -161,6 +168,50 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 }
 #endif /* mul_u64_u32_shr */
 
+#ifndef mul_u64_u64_shr
+static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
+{
+	union {
+		u64 ll;
+		struct {
+#ifdef __BIG_ENDIAN
+			u32 high, low;
+#else
+			u32 low, high;
+#endif
+		} l;
+	} rl, rm, rn, rh, a0, b0;
+	u64 c;
+
+	a0.ll = a;
+	b0.ll = b;
+
+	rl.ll = (u64)a0.l.low * b0.l.low;
+	rm.ll = (u64)a0.l.low * b0.l.high;
+	rn.ll = (u64)a0.l.high * b0.l.low;
+	rh.ll = (u64)a0.l.high * b0.l.high;
+
+	/*
+	 * Each of these lines computes a 64-bit intermediate result into "c",
+	 * starting at bits 32-95.  The low 32-bits go into the result of the
+	 * multiplication, the high 32-bits are carried into the next step.
+	 */
+	rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
+	rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
+	rh.l.high = (c >> 32) + rh.l.high;
+
+	/*
+	 * The 128-bit result of the multiplication is in rl.ll and rh.ll,
+	 * shift it right and throw away the high part of the result.
+	 */
+	if (shift == 0)
+		return rl.ll;
+	if (shift < 64)
+		return (rl.ll >> shift) | (rh.ll << (64 - shift));
+	return rh.ll >> (shift & 63);
+}
+#endif /* mul_u64_u64_shr */
+
 #endif
 
 #endif /* _LINUX_MATH64_H */
-- 
cgit v1.2.3


From 381d585c80e34988269bd7901ad910981e900be1 Mon Sep 17 00:00:00 2001
From: Haozhong Zhang <haozhong.zhang@intel.com>
Date: Tue, 20 Oct 2015 15:39:04 +0800
Subject: KVM: x86: Replace call-back set_tsc_khz() with a common function

Both VMX and SVM propagate virtual_tsc_khz in the same way, so this
patch removes the call-back set_tsc_khz() and replaces it with a common
function.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/svm.c              | 36 --------------------------------
 arch/x86/kvm/vmx.c              | 17 ---------------
 arch/x86/kvm/x86.c              | 46 ++++++++++++++++++++++++++++++++++++-----
 include/linux/math64.h          | 29 ++++++++++++++++++++++++++
 5 files changed, 70 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52d1419968eb..c5a3f3d66e90 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -853,7 +853,6 @@ struct kvm_x86_ops {
 
 	bool (*has_wbinvd_exit)(void);
 
-	void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
 	u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
 	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65f4f1947a62..f6e49a6c9ab0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -957,41 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
-static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
-{
-	u64 ratio;
-	u64 khz;
-
-	/* Guest TSC same frequency as host TSC? */
-	if (!scale) {
-		vcpu->arch.tsc_scaling_ratio = TSC_RATIO_DEFAULT;
-		return;
-	}
-
-	/* TSC scaling supported? */
-	if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
-		if (user_tsc_khz > tsc_khz) {
-			vcpu->arch.tsc_catchup = 1;
-			vcpu->arch.tsc_always_catchup = 1;
-		} else
-			WARN(1, "user requested TSC rate below hardware speed\n");
-		return;
-	}
-
-	khz = user_tsc_khz;
-
-	/* TSC scaling required  - calculate ratio */
-	ratio = khz << 32;
-	do_div(ratio, tsc_khz);
-
-	if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
-		WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
-				user_tsc_khz);
-		return;
-	}
-	vcpu->arch.tsc_scaling_ratio = ratio;
-}
-
 static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4402,7 +4367,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
-	.set_tsc_khz = svm_set_tsc_khz,
 	.read_tsc_offset = svm_read_tsc_offset,
 	.write_tsc_offset = svm_write_tsc_offset,
 	.adjust_tsc_offset = svm_adjust_tsc_offset,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a26ed285931b..baee46893899 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2382,22 +2382,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 	return host_tsc + tsc_offset;
 }
 
-/*
- * Engage any workarounds for mis-matched TSC rates.  Currently limited to
- * software catchup for faster rates on slower CPUs.
- */
-static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
-{
-	if (!scale)
-		return;
-
-	if (user_tsc_khz > tsc_khz) {
-		vcpu->arch.tsc_catchup = 1;
-		vcpu->arch.tsc_always_catchup = 1;
-	} else
-		WARN(1, "user requested TSC rate below hardware speed\n");
-}
-
 static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
 {
 	return vmcs_read64(TSC_OFFSET);
@@ -10826,7 +10810,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
-	.set_tsc_khz = vmx_set_tsc_khz,
 	.read_tsc_offset = vmx_read_tsc_offset,
 	.write_tsc_offset = vmx_write_tsc_offset,
 	.adjust_tsc_offset = vmx_adjust_tsc_offset,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1473e64cb744..c314e8d22a67 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1253,7 +1253,43 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm)
 	return v;
 }
 
-static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
+static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
+{
+	u64 ratio;
+
+	/* Guest TSC same frequency as host TSC? */
+	if (!scale) {
+		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
+		return 0;
+	}
+
+	/* TSC scaling supported? */
+	if (!kvm_has_tsc_control) {
+		if (user_tsc_khz > tsc_khz) {
+			vcpu->arch.tsc_catchup = 1;
+			vcpu->arch.tsc_always_catchup = 1;
+			return 0;
+		} else {
+			WARN(1, "user requested TSC rate below hardware speed\n");
+			return -1;
+		}
+	}
+
+	/* TSC scaling required  - calculate ratio */
+	ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
+				user_tsc_khz, tsc_khz);
+
+	if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
+		WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
+			  user_tsc_khz);
+		return -1;
+	}
+
+	vcpu->arch.tsc_scaling_ratio = ratio;
+	return 0;
+}
+
+static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 {
 	u32 thresh_lo, thresh_hi;
 	int use_scaling = 0;
@@ -1262,7 +1298,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 	if (this_tsc_khz == 0) {
 		/* set tsc_scaling_ratio to a safe value */
 		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
-		return;
+		return -1;
 	}
 
 	/* Compute a scale to convert nanoseconds in TSC cycles */
@@ -1283,7 +1319,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 		pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
 		use_scaling = 1;
 	}
-	kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
+	return set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
 }
 
 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
@@ -3353,9 +3389,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (user_tsc_khz == 0)
 			user_tsc_khz = tsc_khz;
 
-		kvm_set_tsc_khz(vcpu, user_tsc_khz);
+		if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
+			r = 0;
 
-		r = 0;
 		goto out;
 	}
 	case KVM_GET_TSC_KHZ: {
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 44282ec7b682..6e8b5b270ffe 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -214,4 +214,33 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
 
 #endif
 
+#ifndef mul_u64_u32_div
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
+{
+	union {
+		u64 ll;
+		struct {
+#ifdef __BIG_ENDIAN
+			u32 high, low;
+#else
+			u32 low, high;
+#endif
+		} l;
+	} u, rl, rh;
+
+	u.ll = a;
+	rl.ll = (u64)u.l.low * mul;
+	rh.ll = (u64)u.l.high * mul + rl.l.high;
+
+	/* Bits 32-63 of the result will be in rh.l.low. */
+	rl.l.high = do_div(rh.ll, divisor);
+
+	/* Bits 0-31 of the result will be in rl.l.low.	*/
+	do_div(rl.ll, divisor);
+
+	rl.l.high = rh.l.low;
+	return rl.ll;
+}
+#endif /* mul_u64_u32_div */
+
 #endif /* _LINUX_MATH64_H */
-- 
cgit v1.2.3


From f70cd6b07e629f367bb9b1ac9d0e3e669eb325c0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 02:39:55 +0100
Subject: context_tracking: remove duplicate enabled check

All calls to context_tracking_enter and context_tracking_exit
are already checking context_tracking_is_enabled, except the
context_tracking_user_enter and context_tracking_user_exit
functions left in for the benefit of assembly calls.

Pull the check up to those functions, by making them simple
wrappers around the user_enter and user_exit inline functions.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Rik van Riel <riel@redhat.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/context_tracking.h |  4 ++--
 kernel/context_tracking.c        | 16 ++--------------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 008fc67d0d96..6ef136ff0897 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -18,13 +18,13 @@ extern void context_tracking_user_exit(void);
 static inline void user_enter(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_user_enter();
+		context_tracking_enter(CONTEXT_USER);
 
 }
 static inline void user_exit(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_user_exit();
+		context_tracking_exit(CONTEXT_USER);
 }
 
 static inline enum ctx_state exception_enter(void)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 0a495ab35bc7..6d4c6ce21275 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -62,15 +62,6 @@ void context_tracking_enter(enum ctx_state state)
 {
 	unsigned long flags;
 
-	/*
-	 * Repeat the user_enter() check here because some archs may be calling
-	 * this from asm and if no CPU needs context tracking, they shouldn't
-	 * go further. Repeat the check here until they support the inline static
-	 * key check.
-	 */
-	if (!context_tracking_is_enabled())
-		return;
-
 	/*
 	 * Some contexts may involve an exception occuring in an irq,
 	 * leading to that nesting:
@@ -128,7 +119,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter);
 
 void context_tracking_user_enter(void)
 {
-	context_tracking_enter(CONTEXT_USER);
+	user_enter();
 }
 NOKPROBE_SYMBOL(context_tracking_user_enter);
 
@@ -148,9 +139,6 @@ void context_tracking_exit(enum ctx_state state)
 {
 	unsigned long flags;
 
-	if (!context_tracking_is_enabled())
-		return;
-
 	if (in_interrupt())
 		return;
 
@@ -181,7 +169,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit);
 
 void context_tracking_user_exit(void)
 {
-	context_tracking_exit(CONTEXT_USER);
+	user_exit();
 }
 NOKPROBE_SYMBOL(context_tracking_user_exit);
 
-- 
cgit v1.2.3


From d0e536d89395ecd8ab78fe999dc4d6f5d140ce46 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 02:39:56 +0100
Subject: context_tracking: avoid irq_save/irq_restore on guest entry and exit

guest_enter and guest_exit must be called with interrupts disabled,
since they take the vtime_seqlock with write_seq{lock,unlock}.
Therefore, it is not necessary to check for exceptions, nor to
save/restore the IRQ state, when context tracking functions are
called by guest_enter and guest_exit.

Split the body of context_tracking_entry and context_tracking_exit
out to __-prefixed functions, and use them from KVM.

Rik van Riel has measured this to speed up a tight vmentry/vmexit
loop by about 2%.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/context_tracking.h |  8 +++--
 kernel/context_tracking.c        | 64 ++++++++++++++++++++++++----------------
 2 files changed, 44 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 6ef136ff0897..68b575afe5f5 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -10,6 +10,10 @@
 #ifdef CONFIG_CONTEXT_TRACKING
 extern void context_tracking_cpu_set(int cpu);
 
+/* Called with interrupts disabled.  */
+extern void __context_tracking_enter(enum ctx_state state);
+extern void __context_tracking_exit(enum ctx_state state);
+
 extern void context_tracking_enter(enum ctx_state state);
 extern void context_tracking_exit(enum ctx_state state);
 extern void context_tracking_user_enter(void);
@@ -88,13 +92,13 @@ static inline void guest_enter(void)
 		current->flags |= PF_VCPU;
 
 	if (context_tracking_is_enabled())
-		context_tracking_enter(CONTEXT_GUEST);
+		__context_tracking_enter(CONTEXT_GUEST);
 }
 
 static inline void guest_exit(void)
 {
 	if (context_tracking_is_enabled())
-		context_tracking_exit(CONTEXT_GUEST);
+		__context_tracking_exit(CONTEXT_GUEST);
 
 	if (vtime_accounting_enabled())
 		vtime_guest_exit(current);
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 6d4c6ce21275..d8560ee3bab7 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -58,27 +58,13 @@ static void context_tracking_recursion_exit(void)
  * instructions to execute won't use any RCU read side critical section
  * because this function sets RCU in extended quiescent state.
  */
-void context_tracking_enter(enum ctx_state state)
+void __context_tracking_enter(enum ctx_state state)
 {
-	unsigned long flags;
-
-	/*
-	 * Some contexts may involve an exception occuring in an irq,
-	 * leading to that nesting:
-	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
-	 * helpers are enough to protect RCU uses inside the exception. So
-	 * just return immediately if we detect we are in an IRQ.
-	 */
-	if (in_interrupt())
-		return;
-
 	/* Kernel threads aren't supposed to go to userspace */
 	WARN_ON_ONCE(!current->mm);
 
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if ( __this_cpu_read(context_tracking.state) != state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -111,7 +97,27 @@ void context_tracking_enter(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, state);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_enter);
+EXPORT_SYMBOL_GPL(__context_tracking_enter);
+
+void context_tracking_enter(enum ctx_state state)
+{
+	unsigned long flags;
+
+	/*
+	 * Some contexts may involve an exception occuring in an irq,
+	 * leading to that nesting:
+	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+	 * helpers are enough to protect RCU uses inside the exception. So
+	 * just return immediately if we detect we are in an IRQ.
+	 */
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_enter(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_enter);
@@ -135,16 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter);
  * This call supports re-entrancy. This way it can be called from any exception
  * handler without needing to know if we came from userspace or not.
  */
-void context_tracking_exit(enum ctx_state state)
+void __context_tracking_exit(enum ctx_state state)
 {
-	unsigned long flags;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if (__this_cpu_read(context_tracking.state) == state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -161,7 +161,19 @@ void context_tracking_exit(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_exit);
+EXPORT_SYMBOL_GPL(__context_tracking_exit);
+
+void context_tracking_exit(enum ctx_state state)
+{
+	unsigned long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_exit(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_exit);
-- 
cgit v1.2.3


From d1cd21427747f15920cd726f5f67a07880e7dee4 Mon Sep 17 00:00:00 2001
From: Jonathan Richardson <jonathar@broadcom.com>
Date: Fri, 16 Oct 2015 17:40:58 -0700
Subject: pwm: Set enable state properly on failed call to enable

The pwm_enable() function didn't clear the enabled bit if a call to the
driver's ->enable() callback returned an error. The result was that the
state of the PWM core was wrong. Clearing the bit when enable returns
an error ensures the state is properly set.

Tested-by: Jonathan Richardson <jonathar@broadcom.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jonathan Richardson <jonathar@broadcom.com>
[thierry.reding@gmail.com: add missing kerneldoc for the lock]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 33 ++++++++++++++++++++++++++-------
 include/linux/pwm.h |  3 +++
 2 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 3f9df3ea3350..b8f6c309c160 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -269,6 +269,7 @@ int pwmchip_add_with_polarity(struct pwm_chip *chip,
 		pwm->pwm = chip->base + i;
 		pwm->hwpwm = i;
 		pwm->polarity = polarity;
+		mutex_init(&pwm->lock);
 
 		radix_tree_insert(&pwm_tree, pwm->pwm, pwm);
 	}
@@ -473,16 +474,22 @@ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity)
 	if (!pwm->chip->ops->set_polarity)
 		return -ENOSYS;
 
-	if (pwm_is_enabled(pwm))
-		return -EBUSY;
+	mutex_lock(&pwm->lock);
+
+	if (pwm_is_enabled(pwm)) {
+		err = -EBUSY;
+		goto unlock;
+	}
 
 	err = pwm->chip->ops->set_polarity(pwm->chip, pwm, polarity);
 	if (err)
-		return err;
+		goto unlock;
 
 	pwm->polarity = polarity;
 
-	return 0;
+unlock:
+	mutex_unlock(&pwm->lock);
+	return err;
 }
 EXPORT_SYMBOL_GPL(pwm_set_polarity);
 
@@ -494,10 +501,22 @@ EXPORT_SYMBOL_GPL(pwm_set_polarity);
  */
 int pwm_enable(struct pwm_device *pwm)
 {
-	if (pwm && !test_and_set_bit(PWMF_ENABLED, &pwm->flags))
-		return pwm->chip->ops->enable(pwm->chip, pwm);
+	int err = 0;
+
+	if (!pwm)
+		return -EINVAL;
+
+	mutex_lock(&pwm->lock);
+
+	if (!test_and_set_bit(PWMF_ENABLED, &pwm->flags)) {
+		err = pwm->chip->ops->enable(pwm->chip, pwm);
+		if (err)
+			clear_bit(PWMF_ENABLED, &pwm->flags);
+	}
+
+	mutex_unlock(&pwm->lock);
 
-	return pwm ? 0 : -EINVAL;
+	return err;
 }
 EXPORT_SYMBOL_GPL(pwm_enable);
 
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index d681f6875aef..cfc3ed46cad2 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -2,6 +2,7 @@
 #define __LINUX_PWM_H
 
 #include <linux/err.h>
+#include <linux/mutex.h>
 #include <linux/of.h>
 
 struct pwm_device;
@@ -87,6 +88,7 @@ enum {
  * @pwm: global index of the PWM device
  * @chip: PWM chip providing this PWM device
  * @chip_data: chip-private data associated with the PWM device
+ * @lock: used to serialize accesses to the PWM device where necessary
  * @period: period of the PWM signal (in nanoseconds)
  * @duty_cycle: duty cycle of the PWM signal (in nanoseconds)
  * @polarity: polarity of the PWM signal
@@ -98,6 +100,7 @@ struct pwm_device {
 	unsigned int pwm;
 	struct pwm_chip *chip;
 	void *chip_data;
+	struct mutex lock;
 
 	unsigned int period;
 	unsigned int duty_cycle;
-- 
cgit v1.2.3


From aabc92bbe3cfe4c545f8ccdaaeeea012a46f0abf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 10 Nov 2015 14:31:18 +0100
Subject: net: add __netdev_alloc_pcpu_stats() to indicate gfp flags

nf_tables may create percpu counters from the packet path through its
dynamic set instantiation infrastructure, so we need a way to allocate
this through GFP_ATOMIC.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c00772bd136..e9d0c8a75380 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2068,20 +2068,23 @@ struct pcpu_sw_netstats {
 	struct u64_stats_sync   syncp;
 };
 
-#define netdev_alloc_pcpu_stats(type)				\
-({								\
-	typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \
-	if (pcpu_stats)	{					\
-		int __cpu;					\
-		for_each_possible_cpu(__cpu) {			\
-			typeof(type) *stat;			\
-			stat = per_cpu_ptr(pcpu_stats, __cpu);	\
-			u64_stats_init(&stat->syncp);		\
-		}						\
-	}							\
-	pcpu_stats;						\
+#define __netdev_alloc_pcpu_stats(type, gfp)				\
+({									\
+	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
+	if (pcpu_stats)	{						\
+		int __cpu;						\
+		for_each_possible_cpu(__cpu) {				\
+			typeof(type) *stat;				\
+			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
+			u64_stats_init(&stat->syncp);			\
+		}							\
+	}								\
+	pcpu_stats;							\
 })
 
+#define netdev_alloc_pcpu_stats(type)					\
+	__netdev_alloc_pcpu_stats(type, GFP_KERNEL);
+
 #include <linux/notifier.h>
 
 /* netdevice notifier chain. Please remember to update the rtnetlink
-- 
cgit v1.2.3


From b1d06b60e90cd5016798b9984f8e420e753f4846 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 6 Nov 2015 19:28:22 -0800
Subject: of: Provide static inline function for of_translate_address if needed

If OF_ADDRESS is not configured, builds can fail with errors such as

drivers/net/ethernet/hisilicon/hns_mdio.c:
	In function 'hns_mdio_bus_name':
drivers/net/ethernet/hisilicon/hns_mdio.c:411:3:
	error: implicit declaration of function 'of_translate_address'

as currently seen when building sparc:allmodconfig.

Introduce a static inline function if OF_ADDRESS is not configured to fix
the build failure. Return OF_BAD_ADDR in this case. For this to work, the
definition of OF_BAD_ADDR has to be moved outside CONFIG_OF conditional
code.

Fixes: 876133d3161d ("net: hisilicon: add OF dependency")
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Frank Rowand <frank.rowand@sonymobile.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h         | 4 ++--
 include/linux/of_address.h | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 2194b8ca41f9..dd10626a615f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -126,6 +126,8 @@ extern raw_spinlock_t devtree_lock;
 #define OF_POPULATED	3 /* device already created for the node */
 #define OF_POPULATED_BUS	4 /* of_platform_populate recursed to children of this node */
 
+#define OF_BAD_ADDR	((u64)-1)
+
 #ifdef CONFIG_OF
 void of_core_init(void);
 
@@ -229,8 +231,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
 
-#define OF_BAD_ADDR	((u64)-1)
-
 static inline const char *of_node_full_name(const struct device_node *np)
 {
 	return np ? np->full_name : "<no-node>";
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index d88e81be6368..507daad0bc8d 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -57,6 +57,13 @@ extern int of_dma_get_range(struct device_node *np, u64 *dma_addr,
 				u64 *paddr, u64 *size);
 extern bool of_dma_is_coherent(struct device_node *np);
 #else /* CONFIG_OF_ADDRESS */
+
+static inline u64 of_translate_address(struct device_node *np,
+				       const __be32 *addr)
+{
+	return OF_BAD_ADDR;
+}
+
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
 					const struct of_device_id *matches,
-- 
cgit v1.2.3


From 5c50002963369c7c622b18ff751719eadbe225c5 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 13 Oct 2015 16:51:02 -0600
Subject: vfs: remove unused wrapper block_page_mkwrite()

The function currently called "__block_page_mkwrite()" used to be called
"block_page_mkwrite()" until a wrapper for this function was added by:

commit 24da4fab5a61 ("vfs: Create __block_page_mkwrite() helper passing
	error values back")

This wrapper, the current "block_page_mkwrite()", is currently unused.
__block_page_mkwrite() is used directly by ext4, nilfs2 and xfs.

Remove the unused wrapper, rename __block_page_mkwrite() back to
block_page_mkwrite() and update the comment above block_page_mkwrite().

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.com>
Cc: Jan Kara <jack@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 24 ++----------------------
 fs/ext4/inode.c             |  4 ++--
 fs/nilfs2/file.c            |  2 +-
 fs/xfs/xfs_file.c           |  2 +-
 include/linux/buffer_head.h |  2 --
 5 files changed, 6 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 51aff0296ce2..4f4cd959da7c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2420,9 +2420,9 @@ EXPORT_SYMBOL(block_commit_write);
  * unlock the page.
  *
  * Direct callers of this function should protect against filesystem freezing
- * using sb_start_write() - sb_end_write() functions.
+ * using sb_start_pagefault() - sb_end_pagefault() functions.
  */
-int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 			 get_block_t get_block)
 {
 	struct page *page = vmf->page;
@@ -2459,26 +2459,6 @@ out_unlock:
 	unlock_page(page);
 	return ret;
 }
-EXPORT_SYMBOL(__block_page_mkwrite);
-
-int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-		   get_block_t get_block)
-{
-	int ret;
-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
-
-	sb_start_pagefault(sb);
-
-	/*
-	 * Update file times before taking page lock. We may end up failing the
-	 * fault so this update may be superfluous but who really cares...
-	 */
-	file_update_time(vma->vm_file);
-
-	ret = __block_page_mkwrite(vma, vmf, get_block);
-	sb_end_pagefault(sb);
-	return block_page_mkwrite_return(ret);
-}
 EXPORT_SYMBOL(block_page_mkwrite);
 
 /*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7d1aad1d9313..ea433a7f4bca 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5283,7 +5283,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	    !ext4_should_journal_data(inode) &&
 	    !ext4_nonda_switch(inode->i_sb)) {
 		do {
-			ret = __block_page_mkwrite(vma, vmf,
+			ret = block_page_mkwrite(vma, vmf,
 						   ext4_da_get_block_prep);
 		} while (ret == -ENOSPC &&
 		       ext4_should_retry_alloc(inode->i_sb, &retries));
@@ -5330,7 +5330,7 @@ retry_alloc:
 		ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
-	ret = __block_page_mkwrite(vma, vmf, get_block);
+	ret = block_page_mkwrite(vma, vmf, get_block);
 	if (!ret && ext4_should_journal_data(inode)) {
 		if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
 			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 54575e3cc1a2..088ba001c6ef 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -109,7 +109,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		goto out;
 
 	file_update_time(vma->vm_file);
-	ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
+	ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
 	if (ret) {
 		nilfs_transaction_abort(inode->i_sb);
 		goto out;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e78feb400e22..f80e90f95ad8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1506,7 +1506,7 @@ xfs_filemap_page_mkwrite(
 		ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
 				    xfs_end_io_dax_write);
 	} else {
-		ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
+		ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
 		ret = block_page_mkwrite_return(ret);
 	}
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e6797ded700e..89d9aa9e79bf 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -227,8 +227,6 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
 			get_block_t *, loff_t *);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
-int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-				get_block_t get_block);
 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 				get_block_t get_block);
 /* Convert errno to return value from ->page_mkwrite() call */
-- 
cgit v1.2.3


From c8fffa643583e00eb9a783abbca251b11bc0d163 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Thu, 8 Oct 2015 17:07:20 -0600
Subject: vfs: remove stale comment in inode_operations

The big warning comment that is currently at the end of struct
inode_operations was added as part of this commit:

4aa7c6346be3 ("vfs: add i_op->dentry_open()")

It was added to warn people not to use the newly added 'dentry_open'
function pointer.

This function pointer was removed as part of this commit:

4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and
		f_inode to the underlay")

The comment was left behind and now refers to nothing, so remove it.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a1cb8c605e0..f3bfbd7d3fa9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1665,8 +1665,6 @@ struct inode_operations {
 			   umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
-
-	/* WARNING: probably going away soon, do not use! */
 } ____cacheline_aligned;
 
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-- 
cgit v1.2.3


From c0a9f72c156baf1e88c33c6ba4450647af1b8804 Mon Sep 17 00:00:00 2001
From: Alex Smith <alex.smith@imgtec.com>
Date: Mon, 12 Oct 2015 10:40:43 +0100
Subject: irqchip: irq-mips-gic: Provide function to map GIC user section

The GIC provides a "user-mode visible" section containing a mirror of
the counter registers which can be mapped into user memory. This will
be used by the VDSO time function implementations, so provide a
function to map it in.

When the GIC is not enabled in Kconfig a dummy inline version of this
function is provided, along with "#define gic_present 0", so that we
don't have to litter the VDSO code with ifdefs.

[markos.chandras@imgtec.com:
  - Move mapping code to arch/mips/kernel/vdso.c and use a resource
    type to get the GIC usermode information
  - Avoid renaming function arguments and use __gic_base_addr to hold
    the base GIC address prior to ioremap.]
[ralf@linux-mips.org: Fix up gic_get_usm_range() to compile and make inline
again.]

Signed-off-by: Alex Smith <alex.smith@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alex Smith <alex.smith@imgtec.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/11281/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/irqchip/irq-mips-gic.c   | 14 ++++++++++++++
 include/linux/irqchip/mips-gic.h | 17 +++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index aeaa061f0dbf..9e17ef27a183 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -29,6 +29,7 @@ struct gic_pcpu_mask {
 	DECLARE_BITMAP(pcpu_mask, GIC_MAX_INTRS);
 };
 
+static unsigned long __gic_base_addr;
 static void __iomem *gic_base;
 static struct gic_pcpu_mask pcpu_masks[NR_CPUS];
 static DEFINE_SPINLOCK(gic_lock);
@@ -301,6 +302,17 @@ int gic_get_c0_fdc_int(void)
 				  GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC));
 }
 
+int gic_get_usm_range(struct resource *gic_usm_res)
+{
+	if (!gic_present)
+		return -1;
+
+	gic_usm_res->start = __gic_base_addr + USM_VISIBLE_SECTION_OFS;
+	gic_usm_res->end = gic_usm_res->start + (USM_VISIBLE_SECTION_SIZE - 1);
+
+	return 0;
+}
+
 static void gic_handle_shared_int(bool chained)
 {
 	unsigned int i, intr, virq, gic_reg_step = mips_cm_is64 ? 8 : 4;
@@ -798,6 +810,8 @@ static void __init __gic_init(unsigned long gic_base_addr,
 {
 	unsigned int gicconfig;
 
+	__gic_base_addr = gic_base_addr;
+
 	gic_base = ioremap_nocache(gic_base_addr, gic_addrspace_size);
 
 	gicconfig = gic_read(GIC_REG(SHARED, GIC_SH_CONFIG));
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index 4e6861605050..ce824db48d64 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -9,6 +9,7 @@
 #define __LINUX_IRQCHIP_MIPS_GIC_H
 
 #include <linux/clocksource.h>
+#include <linux/ioport.h>
 
 #define GIC_MAX_INTRS			256
 
@@ -245,6 +246,8 @@
 #define GIC_SHARED_TO_HWIRQ(x)	(GIC_SHARED_HWIRQ_BASE + (x))
 #define GIC_HWIRQ_TO_SHARED(x)	((x) - GIC_SHARED_HWIRQ_BASE)
 
+#ifdef CONFIG_MIPS_GIC
+
 extern unsigned int gic_present;
 
 extern void gic_init(unsigned long gic_base_addr,
@@ -264,4 +267,18 @@ extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
 extern int gic_get_c0_compare_int(void);
 extern int gic_get_c0_perfcount_int(void);
 extern int gic_get_c0_fdc_int(void);
+extern int gic_get_usm_range(struct resource *gic_usm_res);
+
+#else /* CONFIG_MIPS_GIC */
+
+#define gic_present	0
+
+static inline int gic_get_usm_range(struct resource *gic_usm_res)
+{
+	/* Shouldn't be called. */
+	return -1;
+}
+
+#endif /* CONFIG_MIPS_GIC */
+
 #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */
-- 
cgit v1.2.3


From e3a7a3bf362e2a8acc301e5eaec2631e740a8a95 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 11 Nov 2015 09:37:34 -0700
Subject: block: don't hardcode blk_qc_t -> tag mask

Use the shift/mask we use elsewhere.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 641e5a3ed58c..0fb65843ec1e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -265,7 +265,7 @@ static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
 
 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 {
-	return cookie & 0xffff;
+	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 }
 
 #endif /* __LINUX_BLK_TYPES_H */
-- 
cgit v1.2.3


From e409de992e3ea3674393465f07cc71c948edd87a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Oct 2015 19:18:52 +0200
Subject: 9p: xattr simplifications

Now that the xattr handler is passed to the xattr handler operations, we
can use the same get and set operations for the user, trusted, and security
xattr namespaces.  In those namespaces, we can access the full attribute
name by "reattaching" the name prefix the vfs has skipped for us.  Add a
xattr_full_name helper to make this obvious in the code.

For the "system.posix_acl_access" and "system.posix_acl_default"
attributes, handler->prefix is the full attribute name; the suffix is the
empty string.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: v9fs-developer@lists.sourceforge.net
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/Makefile         |  5 +--
 fs/9p/acl.c            | 51 +++----------------------------
 fs/9p/xattr.c          | 42 ++++++++++++++++++++++++++
 fs/9p/xattr.h          |  3 --
 fs/9p/xattr_security.c | 82 --------------------------------------------------
 fs/9p/xattr_trusted.c  | 82 --------------------------------------------------
 fs/9p/xattr_user.c     | 82 --------------------------------------------------
 fs/xattr.c             | 24 +++++++++++++++
 include/linux/xattr.h  | 18 ++++++-----
 9 files changed, 83 insertions(+), 306 deletions(-)
 delete mode 100644 fs/9p/xattr_security.c
 delete mode 100644 fs/9p/xattr_trusted.c
 delete mode 100644 fs/9p/xattr_user.c

(limited to 'include/linux')

diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index ff7be98f84f2..9619ccadd2fc 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -10,10 +10,7 @@ obj-$(CONFIG_9P_FS) := 9p.o
 	vfs_dentry.o \
 	v9fs.o \
 	fid.o  \
-	xattr.o \
-	xattr_user.o \
-	xattr_trusted.o
+	xattr.o
 
 9p-$(CONFIG_9P_FSCACHE) += cache.o
 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
-9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index e6fe82462043..a7e28890f5ef 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -212,31 +212,12 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep,
 	return 0;
 }
 
-static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
-			       void *buffer, size_t size, int type)
-{
-	char *full_name;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		full_name =  POSIX_ACL_XATTR_ACCESS;
-		break;
-	case ACL_TYPE_DEFAULT:
-		full_name = POSIX_ACL_XATTR_DEFAULT;
-		break;
-	default:
-		BUG();
-	}
-	return v9fs_xattr_get(dentry, full_name, buffer, size);
-}
-
 static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
 			      struct dentry *dentry, const char *name,
 			      void *buffer, size_t size)
 {
 	struct v9fs_session_info *v9ses;
 	struct posix_acl *acl;
-	int type = handler->flags;
 	int error;
 
 	if (strcmp(name, "") != 0)
@@ -247,9 +228,9 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
 	 * We allow set/get/list of acl when access=client is not specified
 	 */
 	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
-		return v9fs_remote_get_acl(dentry, name, buffer, size, type);
+		return v9fs_xattr_get(dentry, handler->prefix, buffer, size);
 
-	acl = v9fs_get_cached_acl(d_inode(dentry), type);
+	acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -260,26 +241,6 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
 	return error;
 }
 
-static int v9fs_remote_set_acl(struct dentry *dentry, const char *name,
-			      const void *value, size_t size,
-			      int flags, int type)
-{
-	char *full_name;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		full_name =  POSIX_ACL_XATTR_ACCESS;
-		break;
-	case ACL_TYPE_DEFAULT:
-		full_name = POSIX_ACL_XATTR_DEFAULT;
-		break;
-	default:
-		BUG();
-	}
-	return v9fs_xattr_set(dentry, full_name, value, size, flags);
-}
-
-
 static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 			      struct dentry *dentry, const char *name,
 			      const void *value, size_t size, int flags)
@@ -298,8 +259,8 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 	 * xattr value. We leave it to the server to validate
 	 */
 	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
-		return v9fs_remote_set_acl(dentry, name,
-					   value, size, flags, handler->flags);
+		return v9fs_xattr_set(dentry, handler->prefix, value, size,
+				      flags);
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
@@ -320,7 +281,6 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 
 	switch (handler->flags) {
 	case ACL_TYPE_ACCESS:
-		name = POSIX_ACL_XATTR_ACCESS;
 		if (acl) {
 			umode_t mode = inode->i_mode;
 			retval = posix_acl_equiv_mode(acl, &mode);
@@ -351,7 +311,6 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 		}
 		break;
 	case ACL_TYPE_DEFAULT:
-		name = POSIX_ACL_XATTR_DEFAULT;
 		if (!S_ISDIR(inode->i_mode)) {
 			retval = acl ? -EINVAL : 0;
 			goto err_out;
@@ -360,7 +319,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 	default:
 		BUG();
 	}
-	retval = v9fs_xattr_set(dentry, name, value, size, flags);
+	retval = v9fs_xattr_set(dentry, handler->prefix, value, size, flags);
 	if (!retval)
 		set_cached_acl(inode, handler->flags, acl);
 err_out:
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 0cf44b6cccd6..e3d026ac382e 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -137,6 +137,48 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 	return v9fs_xattr_get(dentry, NULL, buffer, buffer_size);
 }
 
+static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
+				  struct dentry *dentry, const char *name,
+				  void *buffer, size_t size)
+{
+	const char *full_name = xattr_full_name(handler, name);
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return v9fs_xattr_get(dentry, full_name, buffer, size);
+}
+
+static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
+				  struct dentry *dentry, const char *name,
+				  const void *value, size_t size, int flags)
+{
+	const char *full_name = xattr_full_name(handler, name);
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return v9fs_xattr_set(dentry, full_name, value, size, flags);
+}
+
+static struct xattr_handler v9fs_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.get	= v9fs_xattr_handler_get,
+	.set	= v9fs_xattr_handler_set,
+};
+
+static struct xattr_handler v9fs_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.get	= v9fs_xattr_handler_get,
+	.set	= v9fs_xattr_handler_set,
+};
+
+#ifdef CONFIG_9P_FS_SECURITY
+static struct xattr_handler v9fs_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.get	= v9fs_xattr_handler_get,
+	.set	= v9fs_xattr_handler_set,
+};
+#endif
+
 const struct xattr_handler *v9fs_xattr_handlers[] = {
 	&v9fs_xattr_user_handler,
 	&v9fs_xattr_trusted_handler,
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index d3e2ea3840be..c63c3bea5de5 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -19,9 +19,6 @@
 #include <net/9p/client.h>
 
 extern const struct xattr_handler *v9fs_xattr_handlers[];
-extern struct xattr_handler v9fs_xattr_user_handler;
-extern struct xattr_handler v9fs_xattr_trusted_handler;
-extern struct xattr_handler v9fs_xattr_security_handler;
 extern const struct xattr_handler v9fs_xattr_acl_access_handler;
 extern const struct xattr_handler v9fs_xattr_acl_default_handler;
 
diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c
deleted file mode 100644
index c0a470add13c..000000000000
--- a/fs/9p/xattr_security.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright IBM Corporation, 2010
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include "xattr.h"
-
-static int v9fs_xattr_security_get(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			void *buffer, size_t size)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
-	memcpy(full_name+prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
-	kfree(full_name);
-	return retval;
-}
-
-static int v9fs_xattr_security_set(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			const void *value, size_t size, int flags)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
-	memcpy(full_name + prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
-	kfree(full_name);
-	return retval;
-}
-
-struct xattr_handler v9fs_xattr_security_handler = {
-	.prefix	= XATTR_SECURITY_PREFIX,
-	.get	= v9fs_xattr_security_get,
-	.set	= v9fs_xattr_security_set,
-};
diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c
deleted file mode 100644
index b888a4eecd1a..000000000000
--- a/fs/9p/xattr_trusted.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright IBM Corporation, 2010
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include "xattr.h"
-
-static int v9fs_xattr_trusted_get(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			void *buffer, size_t size)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
-	memcpy(full_name+prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
-	kfree(full_name);
-	return retval;
-}
-
-static int v9fs_xattr_trusted_set(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			const void *value, size_t size, int flags)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
-	memcpy(full_name + prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
-	kfree(full_name);
-	return retval;
-}
-
-struct xattr_handler v9fs_xattr_trusted_handler = {
-	.prefix	= XATTR_TRUSTED_PREFIX,
-	.get	= v9fs_xattr_trusted_get,
-	.set	= v9fs_xattr_trusted_set,
-};
diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c
deleted file mode 100644
index 06f136cbe264..000000000000
--- a/fs/9p/xattr_user.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright IBM Corporation, 2010
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include "xattr.h"
-
-static int v9fs_xattr_user_get(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			void *buffer, size_t size)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_USER_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
-	memcpy(full_name+prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
-	kfree(full_name);
-	return retval;
-}
-
-static int v9fs_xattr_user_set(const struct xattr_handler *handler,
-			struct dentry *dentry, const char *name,
-			const void *value, size_t size, int flags)
-{
-	int retval;
-	char *full_name;
-	size_t name_len;
-	size_t prefix_len = XATTR_USER_PREFIX_LEN;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	name_len = strlen(name);
-	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
-	if (!full_name)
-		return -ENOMEM;
-	memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
-	memcpy(full_name + prefix_len, name, name_len);
-	full_name[prefix_len + name_len] = '\0';
-
-	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
-	kfree(full_name);
-	return retval;
-}
-
-struct xattr_handler v9fs_xattr_user_handler = {
-	.prefix	= XATTR_USER_PREFIX,
-	.get	= v9fs_xattr_user_get,
-	.set	= v9fs_xattr_user_set,
-};
diff --git a/fs/xattr.c b/fs/xattr.c
index 44377b6f6001..9b932b95d74e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -790,6 +790,30 @@ EXPORT_SYMBOL(generic_listxattr);
 EXPORT_SYMBOL(generic_setxattr);
 EXPORT_SYMBOL(generic_removexattr);
 
+/**
+ * xattr_full_name  -  Compute full attribute name from suffix
+ *
+ * @handler:	handler of the xattr_handler operation
+ * @name:	name passed to the xattr_handler operation
+ *
+ * The get and set xattr handler operations are called with the remainder of
+ * the attribute name after skipping the handler's prefix: for example, "foo"
+ * is passed to the get operation of a handler with prefix "user." to get
+ * attribute "user.foo".  The full name is still "there" in the name though.
+ *
+ * Note: the list xattr handler operation when called from the vfs is passed a
+ * NULL name; some file systems use this operation internally, with varying
+ * semantics.
+ */
+const char *xattr_full_name(const struct xattr_handler *handler,
+			    const char *name)
+{
+	size_t prefix_len = strlen(handler->prefix);
+
+	return name - prefix_len;
+}
+EXPORT_SYMBOL(xattr_full_name);
+
 /*
  * Allocate new xattr and copy in the value; but leave the name to callers.
  */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 91b0a68d38dc..89474b9d260c 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -21,15 +21,19 @@ struct dentry;
 
 struct xattr_handler {
 	const char *prefix;
-	int flags;	/* fs private flags passed back to the handlers */
-	size_t (*list)(struct dentry *dentry, char *list, size_t list_size,
-		       const char *name, size_t name_len, int handler_flags);
-	int (*get)(struct dentry *dentry, const char *name, void *buffer,
-		   size_t size, int handler_flags);
-	int (*set)(struct dentry *dentry, const char *name, const void *buffer,
-		   size_t size, int flags, int handler_flags);
+	int flags;      /* fs private flags */
+	size_t (*list)(const struct xattr_handler *, struct dentry *dentry,
+		       char *list, size_t list_size, const char *name,
+		       size_t name_len);
+	int (*get)(const struct xattr_handler *, struct dentry *dentry,
+		   const char *name, void *buffer, size_t size);
+	int (*set)(const struct xattr_handler *, struct dentry *dentry,
+		   const char *name, const void *buffer, size_t size,
+		   int flags);
 };
 
+const char *xattr_full_name(const struct xattr_handler *, const char *);
+
 struct xattr {
 	const char *name;
 	void *value;
-- 
cgit v1.2.3


From 66189961e986e53ae39822898fc2ce88f44c61bb Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:26 +0200
Subject: net/mlx5e: Added self loopback prevention

Prevent outgoing multicast frames from looping back to the RX queue.

By introducing new HW capability self_lb_en_modifiable, which indicates
the support to modify self_lb_en bit in modify_tir command.

When this capability is set we can prevent TIRs from sending back
loopback multicast traffic to their own RQs, by "refreshing TIRs" with
modify_tir command, on every time new channels (SQs/RQs) are created at
device open.
This is needed since TIRs are static and only allocated once on driver
load, and the loopback decision is under their responsibility.

Fixes issues of the kind:
"IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!"
The issue is seen since the IPv6 solicitations multicast messages are
loopedback and the network stack thinks they are coming from another host.

Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 48 +++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                     | 24 +++++++-----
 2 files changed, 62 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5fc4d2d78cdf..df001754bcd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
 	return err;
 }
 
+static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev,
+						  u32 tirn)
+{
+	void *in;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+	err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < MLX5E_NUM_TT; i++) {
+		err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev,
+							     priv->tirn[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1376,6 +1412,13 @@ int mlx5e_open_locked(struct net_device *netdev)
 		goto err_clear_state_opened_flag;
 	}
 
+	err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+	if (err) {
+		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
+			   __func__, err);
+		goto err_close_channels;
+	}
+
 	mlx5e_update_carrier(priv);
 	mlx5e_redirect_rqts(priv);
 
@@ -1383,6 +1426,8 @@ int mlx5e_open_locked(struct net_device *netdev)
 
 	return 0;
 
+err_close_channels:
+	mlx5e_close_channels(priv);
 err_clear_state_opened_flag:
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 	return err;
@@ -1909,6 +1954,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 			       "Not creating net device, some required device capabilities are missing\n");
 		return -ENOTSUPP;
 	}
+	if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+		mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+
 	return 0;
 }
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dd2097455a2e..1565324eb620 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_0[0x6];
+	u8         reserved_0[0x3];
+	u8         self_lb_en_modifiable[0x1];
+	u8         reserved_1[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_1[0x4];
+	u8         reserved_2[0x4];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reserved_2[0x3];
+	u8         reserved_3[0x3];
 	u8         tunnel_lso_const_out_ip_id[0x1];
-	u8         reserved_3[0x2];
+	u8         reserved_4[0x2];
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_4[0x20];
+	u8         reserved_5[0x20];
 
-	u8         reserved_5[0x10];
+	u8         reserved_6[0x10];
 	u8         lro_min_mss_size[0x10];
 
-	u8         reserved_6[0x120];
+	u8         reserved_7[0x120];
 
 	u8         lro_timer_supported_periods[4][0x20];
 
-	u8         reserved_7[0x600];
+	u8         reserved_8[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
@@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits {
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_1[0x1b];
+	u8         self_lb_en[0x1];
+	u8         reserved_2[0x3];
 	u8         lro[0x1];
 };
 
-- 
cgit v1.2.3


From 500404ebcbd074ca11aa0c3fd9a268aa4054fd8b Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 3 Nov 2015 12:28:10 +0200
Subject: dmaengine: of_dma: Correct return code for
 of_dma_request_slave_channel in case !CONFIG_OF

of_dma_request_slave_channel should return either pointer for valid
dma_chan or ERR_PTR() error code, NULL is not expected to be returned.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/of_dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 36112cdd665a..b90d8ec57c1f 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -80,7 +80,7 @@ static inline int of_dma_router_register(struct device_node *np,
 static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
-- 
cgit v1.2.3


From aedf17f4515b12ba1cd73298e66baa69cf93010e Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:36 +0100
Subject: lightnvm: change max_phys_sect to uint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The max_phys_sect variable is defined as a char. We do a boundary check
to maximally allow 256 physical page descriptors per command. As we are
not indexing from zero. This expression is always false. Bump the
max_phys_sect to an unsigned int to support the range check.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 69c9057e1ab8..32b5369e814e 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -220,7 +220,7 @@ struct nvm_dev_ops {
 	nvm_dev_dma_alloc_fn	*dev_dma_alloc;
 	nvm_dev_dma_free_fn	*dev_dma_free;
 
-	uint8_t			max_phys_sect;
+	unsigned int		max_phys_sect;
 };
 
 struct nvm_lun {
-- 
cgit v1.2.3


From 11450469830f2481a9e7cb181609288d40f41323 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:37 +0100
Subject: lightnvm: update bad block table format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The specification was changed to reflect a multi-value bad block table.
Instead of bit-based bad block table, the bad block table now allows
eight bad block categories. Currently four are defined:

 * Factory bad blocks
 * Grown bad blocks
 * Device-side reserved blocks
 * Host-side reserved blocks

The factory and grown bad blocks are the regular bad blocks. The
reserved blocks are either for internal use or external use. In
particular, the device-side reserved blocks allows the host to
bootstrap from a limited number of flash blocks. Reducing the flash
blocks to scan upon super block initialization.

Support for both get bad block table and set bad block table is added.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/gennvm.c    |  32 ++++++++----
 drivers/lightnvm/gennvm.h    |   2 +
 drivers/nvme/host/lightnvm.c | 113 ++++++++++++++++++++++++++++++++++---------
 include/linux/lightnvm.h     |   6 +--
 4 files changed, 117 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index ae1fb2bdc5f4..8cfc0114ff13 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -64,19 +64,22 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
 	return 0;
 }
 
-static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks,
+static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
 								void *private)
 {
 	struct gen_nvm *gn = private;
-	struct gen_lun *lun = &gn->luns[lun_id];
+	struct nvm_dev *dev = gn->dev;
+	struct gen_lun *lun;
 	struct nvm_block *blk;
 	int i;
 
-	if (unlikely(bitmap_empty(bb_bitmap, nr_blocks)))
-		return 0;
+	ppa = addr_to_generic_mode(gn->dev, ppa);
+	lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun];
+
+	for (i = 0; i < nr_blocks; i++) {
+		if (blks[i] == 0)
+			continue;
 
-	i = -1;
-	while ((i = find_next_bit(bb_bitmap, nr_blocks, i + 1)) < nr_blocks) {
 		blk = &lun->vlun.blocks[i];
 		if (!blk) {
 			pr_err("gennvm: BB data is out of bounds.\n");
@@ -171,8 +174,16 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 		}
 
 		if (dev->ops->get_bb_tbl) {
-			ret = dev->ops->get_bb_tbl(dev->q, lun->vlun.id,
-					dev->blks_per_lun, gennvm_block_bb, gn);
+			struct ppa_addr ppa;
+
+			ppa.ppa = 0;
+			ppa.g.ch = lun->vlun.chnl_id;
+			ppa.g.lun = lun->vlun.id;
+			ppa = generic_to_addr_mode(dev, ppa);
+
+			ret = dev->ops->get_bb_tbl(dev->q, ppa,
+						dev->blks_per_lun,
+						gennvm_block_bb, gn);
 			if (ret)
 				pr_err("gennvm: could not read BB table\n");
 		}
@@ -199,6 +210,7 @@ static int gennvm_register(struct nvm_dev *dev)
 	if (!gn)
 		return -ENOMEM;
 
+	gn->dev = dev;
 	gn->nr_luns = dev->nr_luns;
 	dev->mp = gn;
 
@@ -354,10 +366,10 @@ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
 	int i;
 
-	if (!dev->ops->set_bb)
+	if (!dev->ops->set_bb_tbl)
 		return;
 
-	if (dev->ops->set_bb(dev->q, rqd, 1))
+	if (dev->ops->set_bb_tbl(dev->q, rqd, 1))
 		return;
 
 	gennvm_addr_to_generic_mode(dev, rqd);
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index d23bd3501ddc..9c24b5b32dac 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -35,6 +35,8 @@ struct gen_lun {
 };
 
 struct gen_nvm {
+	struct nvm_dev *dev;
+
 	int nr_luns;
 	struct gen_lun *luns;
 };
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index e0b7b95813bc..2c3546516300 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -93,7 +93,7 @@ struct nvme_nvm_l2ptbl {
 	__le16			cdw14[6];
 };
 
-struct nvme_nvm_bbtbl {
+struct nvme_nvm_getbbtbl {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
@@ -101,10 +101,23 @@ struct nvme_nvm_bbtbl {
 	__u64			rsvd[2];
 	__le64			prp1;
 	__le64			prp2;
-	__le32			prp1_len;
-	__le32			prp2_len;
-	__le32			lbb;
-	__u32			rsvd11[3];
+	__le64			spba;
+	__u32			rsvd4[4];
+};
+
+struct nvme_nvm_setbbtbl {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__le64			rsvd[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le64			spba;
+	__le16			nlb;
+	__u8			value;
+	__u8			rsvd3;
+	__u32			rsvd4[3];
 };
 
 struct nvme_nvm_erase_blk {
@@ -129,8 +142,8 @@ struct nvme_nvm_command {
 		struct nvme_nvm_hb_rw hb_rw;
 		struct nvme_nvm_ph_rw ph_rw;
 		struct nvme_nvm_l2ptbl l2p;
-		struct nvme_nvm_bbtbl get_bb;
-		struct nvme_nvm_bbtbl set_bb;
+		struct nvme_nvm_getbbtbl get_bb;
+		struct nvme_nvm_setbbtbl set_bb;
 		struct nvme_nvm_erase_blk erase;
 	};
 };
@@ -187,6 +200,20 @@ struct nvme_nvm_id {
 	struct nvme_nvm_id_group groups[4];
 } __packed;
 
+struct nvme_nvm_bb_tbl {
+	__u8	tblid[4];
+	__le16	verid;
+	__le16	revid;
+	__le32	rvsd1;
+	__le32	tblks;
+	__le32	tfact;
+	__le32	tgrown;
+	__le32	tdresv;
+	__le32	thresv;
+	__le32	rsvd2[8];
+	__u8	blk[0];
+};
+
 /*
  * Check we didn't inadvertently grow the command struct
  */
@@ -195,12 +222,14 @@ static inline void _nvme_nvm_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512);
 }
 
 static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
@@ -322,43 +351,80 @@ out:
 	return ret;
 }
 
-static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid,
-				unsigned int nr_blocks,
-				nvm_bb_update_fn *update_bbtbl, void *priv)
+static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa,
+				int nr_blocks, nvm_bb_update_fn *update_bbtbl,
+				void *priv)
 {
 	struct nvme_ns *ns = q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_command c = {};
-	void *bb_bitmap;
-	u16 bb_bitmap_size;
+	struct nvme_nvm_bb_tbl *bb_tbl;
+	int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
 	int ret = 0;
 
 	c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
 	c.get_bb.nsid = cpu_to_le32(ns->ns_id);
-	c.get_bb.lbb = cpu_to_le32(lunid);
-	bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE;
-	bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL);
-	if (!bb_bitmap)
-		return -ENOMEM;
+	c.get_bb.spba = cpu_to_le64(ppa.ppa);
 
-	bitmap_zero(bb_bitmap, nr_blocks);
+	bb_tbl = kzalloc(tblsz, GFP_KERNEL);
+	if (!bb_tbl)
+		return -ENOMEM;
 
-	ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap,
-								bb_bitmap_size);
+	ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_tbl, tblsz);
 	if (ret) {
 		dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
 		ret = -EIO;
 		goto out;
 	}
 
-	ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv);
+	if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
+		bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
+		dev_err(dev->dev, "bbt format mismatch\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (le16_to_cpu(bb_tbl->verid) != 1) {
+		ret = -EINVAL;
+		dev_err(dev->dev, "bbt version not supported\n");
+		goto out;
+	}
+
+	if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
+		ret = -EINVAL;
+		dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
+					le32_to_cpu(bb_tbl->tblks), nr_blocks);
+		goto out;
+	}
+
+	ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
 	if (ret) {
 		ret = -EINTR;
 		goto out;
 	}
 
 out:
-	kfree(bb_bitmap);
+	kfree(bb_tbl);
+	return ret;
+}
+
+static int nvme_nvm_set_bb_tbl(struct request_queue *q, struct nvm_rq *rqd,
+								int type)
+{
+	struct nvme_ns *ns = q->queuedata;
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_nvm_command c = {};
+	int ret = 0;
+
+	c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
+	c.set_bb.nsid = cpu_to_le32(ns->ns_id);
+	c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa);
+	c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
+	c.set_bb.value = type;
+
+	ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
+	if (ret)
+		dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
 	return ret;
 }
 
@@ -474,6 +540,7 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
 	.get_l2p_tbl		= nvme_nvm_get_l2p_tbl,
 
 	.get_bb_tbl		= nvme_nvm_get_bb_tbl,
+	.set_bb_tbl		= nvme_nvm_set_bb_tbl,
 
 	.submit_io		= nvme_nvm_submit_io,
 	.erase_block		= nvme_nvm_erase_block,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 32b5369e814e..9b3dc1bc9296 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -191,11 +191,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 struct nvm_block;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
-typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
+typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
 typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
 typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
 				nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
+typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
 typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
 typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
@@ -210,7 +210,7 @@ struct nvm_dev_ops {
 	nvm_id_fn		*identity;
 	nvm_get_l2p_tbl_fn	*get_l2p_tbl;
 	nvm_op_bb_tbl_fn	*get_bb_tbl;
-	nvm_op_set_bb_fn	*set_bb;
+	nvm_op_set_bb_fn	*set_bb_tbl;
 
 	nvm_submit_io_fn	*submit_io;
 	nvm_erase_blk_fn	*erase_block;
-- 
cgit v1.2.3


From 12be5edf68e785dd5dc8665db5a88152b49c1fe8 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:39 +0100
Subject: lightnvm: expose mccap in identify command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mccap field is required for I/O command option support. It defines the
following flash access modes:

 * SLC mode
 * Erase/Program Suspension
 * Scramble On/Off
 * Encryption

It is slotted in between mpos and cpar, changing the offset for
cpar as well.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/lightnvm.c | 4 +++-
 include/linux/lightnvm.h     | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 60687ed68b5d..52b311cf694c 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -169,8 +169,9 @@ struct nvme_nvm_id_group {
 	__le32			tbet;
 	__le32			tbem;
 	__le32			mpos;
+	__le32			mccap;
 	__le16			cpar;
-	__u8			reserved[910];
+	__u8			reserved[906];
 } __packed;
 
 struct nvme_nvm_addr_format {
@@ -265,6 +266,7 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
 		dst->tbet = le32_to_cpu(src->tbet);
 		dst->tbem = le32_to_cpu(src->tbem);
 		dst->mpos = le32_to_cpu(src->mpos);
+		dst->mccap = le32_to_cpu(src->mccap);
 
 		dst->cpar = le16_to_cpu(src->cpar);
 	}
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 9b3dc1bc9296..2572856e2a89 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -74,6 +74,7 @@ struct nvm_id_group {
 	u32	tbet;
 	u32	tbem;
 	u32	mpos;
+	u32	mccap;
 	u16	cpar;
 	u8	res[913];
 } __packed;
-- 
cgit v1.2.3


From 73387e7bed260c89628fc6a4e3632b45be9776b0 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:40 +0100
Subject: lightnvm: remove unused attrs in nvm_id structs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The nvm_id, nvm_id_group and nvm_addr_format data structures contain
reserved attributes. They are unused by media managers and targets.
Remove them.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2572856e2a89..e6ef8aaf533f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -58,7 +58,6 @@ enum {
 struct nvm_id_group {
 	u8	mtype;
 	u8	fmtype;
-	u16	res16;
 	u8	num_ch;
 	u8	num_lun;
 	u8	num_pln;
@@ -76,8 +75,7 @@ struct nvm_id_group {
 	u32	mpos;
 	u32	mccap;
 	u16	cpar;
-	u8	res[913];
-} __packed;
+};
 
 struct nvm_addr_format {
 	u8	ch_offset;
@@ -92,19 +90,16 @@ struct nvm_addr_format {
 	u8	pg_len;
 	u8	sect_offset;
 	u8	sect_len;
-	u8	res[4];
 };
 
 struct nvm_id {
 	u8	ver_id;
 	u8	vmnt;
 	u8	cgrps;
-	u8	res[5];
 	u32	cap;
 	u32	dom;
 	struct nvm_addr_format ppaf;
 	u8	ppat;
-	u8	resv[224];
 	struct nvm_id_group groups[4];
 } __packed;
 
-- 
cgit v1.2.3


From 7386af270c72be65c7cb2ba4ad0d4e70dc373106 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:44 +0100
Subject: lightnvm: remove linear and device addr modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The linear and device specific address modes can be replaced with a
simple offset and bit length conversion that is generic across all
devices.

This both simplifies the specification and removes the special case for
qemu nvme, that previously relied on the linear address mapping.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c      |   3 +-
 drivers/lightnvm/gennvm.c    |  12 ++--
 drivers/lightnvm/rrpc.c      |  32 ++++++++-
 drivers/nvme/host/lightnvm.c |   3 +-
 include/linux/lightnvm.h     | 154 ++++++++++---------------------------------
 5 files changed, 73 insertions(+), 131 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 899f6b9a9f68..790b1d7a8d43 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -174,8 +174,7 @@ static int nvm_core_init(struct nvm_dev *dev)
 	dev->sec_size = grp->csecs;
 	dev->oob_size = grp->sos;
 	dev->sec_per_pg = grp->fpg_sz / grp->csecs;
-	dev->addr_mode = id->ppat;
-	dev->addr_format = id->ppaf;
+	memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
 
 	dev->plane_mode = NVM_PLANE_SINGLE;
 	dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 8cfc0114ff13..c0d0eb2357a8 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -73,7 +73,7 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
 	struct nvm_block *blk;
 	int i;
 
-	ppa = addr_to_generic_mode(gn->dev, ppa);
+	ppa = dev_to_generic_addr(gn->dev, ppa);
 	lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun];
 
 	for (i = 0; i < nr_blocks; i++) {
@@ -179,7 +179,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 			ppa.ppa = 0;
 			ppa.g.ch = lun->vlun.chnl_id;
 			ppa.g.lun = lun->vlun.id;
-			ppa = generic_to_addr_mode(dev, ppa);
+			ppa = generic_to_dev_addr(dev, ppa);
 
 			ret = dev->ops->get_bb_tbl(dev->q, ppa,
 						dev->blks_per_lun,
@@ -304,10 +304,10 @@ static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	if (rqd->nr_pages > 1) {
 		for (i = 0; i < rqd->nr_pages; i++)
-			rqd->ppa_list[i] = addr_to_generic_mode(dev,
+			rqd->ppa_list[i] = dev_to_generic_addr(dev,
 							rqd->ppa_list[i]);
 	} else {
-		rqd->ppa_addr = addr_to_generic_mode(dev, rqd->ppa_addr);
+		rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
 	}
 }
 
@@ -317,10 +317,10 @@ static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	if (rqd->nr_pages > 1) {
 		for (i = 0; i < rqd->nr_pages; i++)
-			rqd->ppa_list[i] = generic_to_addr_mode(dev,
+			rqd->ppa_list[i] = generic_to_dev_addr(dev,
 							rqd->ppa_list[i]);
 	} else {
-		rqd->ppa_addr = generic_to_addr_mode(dev, rqd->ppa_addr);
+		rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
 	}
 }
 
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 7ba64c87ba1c..75e59c3a3f96 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -123,12 +123,42 @@ static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
 	return blk->id * rrpc->dev->pgs_per_blk;
 }
 
+static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+							struct ppa_addr r)
+{
+	struct ppa_addr l;
+	int secs, pgs, blks, luns;
+	sector_t ppa = r.ppa;
+
+	l.ppa = 0;
+
+	div_u64_rem(ppa, dev->sec_per_pg, &secs);
+	l.g.sec = secs;
+
+	sector_div(ppa, dev->sec_per_pg);
+	div_u64_rem(ppa, dev->sec_per_blk, &pgs);
+	l.g.pg = pgs;
+
+	sector_div(ppa, dev->pgs_per_blk);
+	div_u64_rem(ppa, dev->blks_per_lun, &blks);
+	l.g.blk = blks;
+
+	sector_div(ppa, dev->blks_per_lun);
+	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+	l.g.lun = luns;
+
+	sector_div(ppa, dev->luns_per_chnl);
+	l.g.ch = ppa;
+
+	return l;
+}
+
 static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
 {
 	struct ppa_addr paddr;
 
 	paddr.ppa = addr;
-	return __linear_to_generic_addr(dev, paddr);
+	return linear_to_generic_addr(dev, paddr);
 }
 
 /* requires lun->lock taken */
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 52b311cf694c..9069be811f82 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -198,8 +198,7 @@ struct nvme_nvm_id {
 	__le32			cap;
 	__le32			dom;
 	struct nvme_nvm_addr_format ppaf;
-	__u8			ppat;
-	__u8			resv[223];
+	__u8			resv[224];
 	struct nvme_nvm_id_group groups[4];
 } __packed;
 
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e6ef8aaf533f..cbe288acb1de 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -99,7 +99,6 @@ struct nvm_id {
 	u32	cap;
 	u32	dom;
 	struct nvm_addr_format ppaf;
-	u8	ppat;
 	struct nvm_id_group groups[4];
 } __packed;
 
@@ -119,39 +118,28 @@ struct nvm_tgt_instance {
 #define NVM_VERSION_MINOR 0
 #define NVM_VERSION_PATCH 0
 
-#define NVM_SEC_BITS (8)
-#define NVM_PL_BITS  (6)
-#define NVM_PG_BITS  (16)
 #define NVM_BLK_BITS (16)
-#define NVM_LUN_BITS (10)
+#define NVM_PG_BITS  (16)
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS  (8)
+#define NVM_LUN_BITS (8)
 #define NVM_CH_BITS  (8)
 
 struct ppa_addr {
+	/* Generic structure for all addresses */
 	union {
-		/* Channel-based PPA format in nand 4x2x2x2x8x10 */
-		struct {
-			u64 ch		: 4;
-			u64 sec		: 2; /* 4 sectors per page */
-			u64 pl		: 2; /* 4 planes per LUN */
-			u64 lun		: 2; /* 4 LUNs per channel */
-			u64 pg		: 8; /* 256 pages per block */
-			u64 blk		: 10;/* 1024 blocks per plane */
-			u64 resved		: 36;
-		} chnl;
-
-		/* Generic structure for all addresses */
 		struct {
+			u64 blk		: NVM_BLK_BITS;
+			u64 pg		: NVM_PG_BITS;
 			u64 sec		: NVM_SEC_BITS;
 			u64 pl		: NVM_PL_BITS;
-			u64 pg		: NVM_PG_BITS;
-			u64 blk		: NVM_BLK_BITS;
 			u64 lun		: NVM_LUN_BITS;
 			u64 ch		: NVM_CH_BITS;
 		} g;
 
 		u64 ppa;
 	};
-} __packed;
+};
 
 struct nvm_rq {
 	struct nvm_tgt_instance *ins;
@@ -259,8 +247,7 @@ struct nvm_dev {
 	int blks_per_lun;
 	int sec_size;
 	int oob_size;
-	int addr_mode;
-	struct nvm_addr_format addr_format;
+	struct nvm_addr_format ppaf;
 
 	/* Calculated/Cached values. These do not reflect the actual usable
 	 * blocks at run-time.
@@ -286,118 +273,45 @@ struct nvm_dev {
 	char name[DISK_NAME_LEN];
 };
 
-/* fallback conversion */
-static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
-							struct ppa_addr r)
-{
-	struct ppa_addr l;
-
-	l.ppa = r.g.sec +
-		r.g.pg  * dev->sec_per_pg +
-		r.g.blk * (dev->pgs_per_blk *
-				dev->sec_per_pg) +
-		r.g.lun * (dev->blks_per_lun *
-				dev->pgs_per_blk *
-				dev->sec_per_pg) +
-		r.g.ch * (dev->blks_per_lun *
-				dev->pgs_per_blk *
-				dev->luns_per_chnl *
-				dev->sec_per_pg);
-
-	return l;
-}
-
-/* fallback conversion */
-static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
-							struct ppa_addr r)
-{
-	struct ppa_addr l;
-	int secs, pgs, blks, luns;
-	sector_t ppa = r.ppa;
-
-	l.ppa = 0;
-
-	div_u64_rem(ppa, dev->sec_per_pg, &secs);
-	l.g.sec = secs;
-
-	sector_div(ppa, dev->sec_per_pg);
-	div_u64_rem(ppa, dev->sec_per_blk, &pgs);
-	l.g.pg = pgs;
-
-	sector_div(ppa, dev->pgs_per_blk);
-	div_u64_rem(ppa, dev->blks_per_lun, &blks);
-	l.g.blk = blks;
-
-	sector_div(ppa, dev->blks_per_lun);
-	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
-	l.g.lun = luns;
-
-	sector_div(ppa, dev->luns_per_chnl);
-	l.g.ch = ppa;
-
-	return l;
-}
-
-static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
+static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
+						struct ppa_addr r)
 {
 	struct ppa_addr l;
 
-	l.ppa = 0;
-
-	l.chnl.sec = r.g.sec;
-	l.chnl.pl = r.g.pl;
-	l.chnl.pg = r.g.pg;
-	l.chnl.blk = r.g.blk;
-	l.chnl.lun = r.g.lun;
-	l.chnl.ch = r.g.ch;
+	l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
+	l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
+	l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
+	l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
+	l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
+	l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
 
 	return l;
 }
 
-static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
+static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
+						struct ppa_addr r)
 {
 	struct ppa_addr l;
 
-	l.ppa = 0;
-
-	l.g.sec = r.chnl.sec;
-	l.g.pl = r.chnl.pl;
-	l.g.pg = r.chnl.pg;
-	l.g.blk = r.chnl.blk;
-	l.g.lun = r.chnl.lun;
-	l.g.ch = r.chnl.ch;
+	/*
+	 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
+	 */
+	l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
+					(((1 << dev->ppaf.blk_len) - 1));
+	l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
+					(((1 << dev->ppaf.pg_len) - 1));
+	l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
+					(((1 << dev->ppaf.sect_len) - 1));
+	l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
+					(((1 << dev->ppaf.pln_len) - 1));
+	l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
+					(((1 << dev->ppaf.lun_len) - 1));
+	l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
+					(((1 << dev->ppaf.ch_len) - 1));
 
 	return l;
 }
 
-static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
-						struct ppa_addr gppa)
-{
-	switch (dev->addr_mode) {
-	case NVM_ADDRMODE_LINEAR:
-		return __linear_to_generic_addr(dev, gppa);
-	case NVM_ADDRMODE_CHANNEL:
-		return __chnl_to_generic_addr(gppa);
-	default:
-		BUG();
-	}
-	return gppa;
-}
-
-static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
-						struct ppa_addr gppa)
-{
-	switch (dev->addr_mode) {
-	case NVM_ADDRMODE_LINEAR:
-		return __generic_to_linear_addr(dev, gppa);
-	case NVM_ADDRMODE_CHANNEL:
-		return __generic_to_chnl_addr(gppa);
-	default:
-		BUG();
-	}
-	return gppa;
-}
-
 static inline int ppa_empty(struct ppa_addr ppa_addr)
 {
 	return (ppa_addr.ppa == ADDR_EMPTY);
-- 
cgit v1.2.3


From 28f9ee22bcdd84726dbf6267d0b58f254166b900 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 16 Nov 2015 15:43:45 -0500
Subject: vlan: Do not put vlan headers back on bridge and macvlan ports

When a vlan is configured with REORDER_HEADER set to 0, the vlan
header is put back into the packet and makes it appear that
the vlan header is still there even after it's been processed.
This posses a problem for bridge and macvlan ports.  The packets
passed to those device may be forwarded and at the time of the
forward, vlan headers end up being unexpectedly present.

With the patch, we make sure that we do not put the vlan header
back (when REORDER_HEADER is 0) if a bridge or macvlan has
been configured on top of the vlan device.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 net/8021q/vlan_core.c     | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc221b967687..67bfac1abfc1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3857,6 +3857,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_EBRIDGE;
 }
 
+static inline bool netif_is_bridge_port(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_BRIDGE_PORT;
+}
+
 static inline bool netif_is_ovs_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_OPENVSWITCH;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+	    !netif_is_macvlan_port(vlan_dev) &&
+	    !netif_is_bridge_port(vlan_dev)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*
-- 
cgit v1.2.3


From 819ec8e1f349f73bdf65bf33a364538e59007a9a Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 16 Nov 2015 23:34:41 +0100
Subject: phy: marvell: Add support for 88E1540 PHY

The 88E1540 can be found embedded in the Marvell 88E6352 switch.  It
is compatible with the 88E1510, so add support for it, using the
88E1510 specific functions.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 16 ++++++++++++++++
 include/linux/marvell_phy.h |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5de8d5827536..0240552b50f3 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1153,6 +1153,21 @@ static struct phy_driver marvell_drivers[] = {
 		.suspend = &genphy_suspend,
 		.driver = { .owner = THIS_MODULE },
 	},
+	{
+		.phy_id = MARVELL_PHY_ID_88E1540,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88E1540",
+		.features = PHY_GBIT_FEATURES,
+		.flags = PHY_HAS_INTERRUPT,
+		.config_aneg = &m88e1510_config_aneg,
+		.read_status = &marvell_read_status,
+		.ack_interrupt = &marvell_ack_interrupt,
+		.config_intr = &marvell_config_intr,
+		.did_interrupt = &m88e1121_did_interrupt,
+		.resume = &genphy_resume,
+		.suspend = &genphy_suspend,
+		.driver = { .owner = THIS_MODULE },
+	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E3016,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -1186,6 +1201,7 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
 	{ MARVELL_PHY_ID_88E1318S, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1116R, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1510, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
 	{ }
 };
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index e6982ac3200d..a57f0dfb6db7 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -16,6 +16,7 @@
 #define MARVELL_PHY_ID_88E1318S		0x01410e90
 #define MARVELL_PHY_ID_88E1116R		0x01410e40
 #define MARVELL_PHY_ID_88E1510		0x01410dd0
+#define MARVELL_PHY_ID_88E1540		0x01410eb0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 
 /* struct phy_device dev_flags definitions */
-- 
cgit v1.2.3


From db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 5 Nov 2015 09:03:50 +0100
Subject: KVM: Provide function for VCPU lookup by id

Let's provide a function to lookup a VCPU by id.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[split patch from refactoring patch]
---
 include/linux/kvm_host.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5706a2108f0a..c923350ca20a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -460,6 +460,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
 	     idx++)
 
+static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (vcpu->vcpu_id == id)
+			return vcpu;
+	return NULL;
+}
+
 #define kvm_for_each_memslot(memslot, slots)	\
 	for (memslot = &slots->memslots[0];	\
 	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
-- 
cgit v1.2.3


From 851df3dc11136fde86ebd78ee7527cb43c7cd349 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Nov 2015 22:34:58 +0100
Subject: scpi: hide get_scpi_ops in module from built-in code

The scpi_clock driver can be built-in when CONFIG_COMPILE_TEST
is set even when ARM_SCPI_PROTOCOL is a loadable module, and
that results in a link error:

drivers/built-in.o: In function `scpi_clocks_probe':
(.text+0x14453c): undefined reference to `get_scpi_ops'

Using #if IS_REACHABLE() around the get_scpi_ops() declaration
makes it build successfully in this case for compile-testing,
but the effect is the same as when ARM_SCPI_PROTOCOL is
disabled, as the code will not be used.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
---
 include/linux/scpi_protocol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index 80af3cd35ae4..72ce932c69b2 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -71,7 +71,7 @@ struct scpi_ops {
 	int (*sensor_get_value)(u16, u32 *);
 };
 
-#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
+#if IS_REACHABLE(CONFIG_ARM_SCPI_PROTOCOL)
 struct scpi_ops *get_scpi_ops(void);
 #else
 static inline struct scpi_ops *get_scpi_ops(void) { return NULL; }
-- 
cgit v1.2.3


From 2e6edc95382cc36423aff18a237173ad62d5ab52 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 19 Nov 2015 13:29:28 -0800
Subject: block: protect rw_page against device teardown

Fix use after free crashes like the following:

 general protection fault: 0000 [#1] SMP
 Call Trace:
  [<ffffffffa0050216>] ? pmem_do_bvec.isra.12+0xa6/0xf0 [nd_pmem]
  [<ffffffffa0050ba2>] pmem_rw_page+0x42/0x80 [nd_pmem]
  [<ffffffff8128fd90>] bdev_read_page+0x50/0x60
  [<ffffffff812972f0>] do_mpage_readpage+0x510/0x770
  [<ffffffff8128fd20>] ? I_BDEV+0x20/0x20
  [<ffffffff811d86dc>] ? lru_cache_add+0x1c/0x50
  [<ffffffff81297657>] mpage_readpages+0x107/0x170
  [<ffffffff8128fd20>] ? I_BDEV+0x20/0x20
  [<ffffffff8128fd20>] ? I_BDEV+0x20/0x20
  [<ffffffff8129058d>] blkdev_readpages+0x1d/0x20
  [<ffffffff811d615f>] __do_page_cache_readahead+0x28f/0x310
  [<ffffffff811d6039>] ? __do_page_cache_readahead+0x169/0x310
  [<ffffffff811c5abd>] ? pagecache_get_page+0x2d/0x1d0
  [<ffffffff811c76f6>] filemap_fault+0x396/0x530
  [<ffffffff811f816e>] __do_fault+0x4e/0xf0
  [<ffffffff811fce7d>] handle_mm_fault+0x11bd/0x1b50

Cc: <stable@vger.kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Reported-by: kbuild test robot <lkp@intel.com>
Acked-by: Matthew Wilcox <willy@linux.intel.com>
[willy: symmetry fixups]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 block/blk.h            |  2 --
 fs/block_dev.c         | 18 ++++++++++++++++--
 include/linux/blkdev.h |  2 ++
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index da722eb786df..c43926d3d74d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -72,8 +72,6 @@ void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 bool __blk_end_bidi_request(struct request *rq, int error,
 			    unsigned int nr_bytes, unsigned int bidi_bytes);
-int blk_queue_enter(struct request_queue *q, gfp_t gfp);
-void blk_queue_exit(struct request_queue *q);
 void blk_freeze_queue(struct request_queue *q);
 
 static inline void blk_queue_enter_live(struct request_queue *q)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bb0dfb1c7af1..c25639e907bd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 			struct page *page)
 {
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
+	int result = -EOPNOTSUPP;
+
 	if (!ops->rw_page || bdev_get_integrity(bdev))
-		return -EOPNOTSUPP;
-	return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+		return result;
+
+	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	if (result)
+		return result;
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+	blk_queue_exit(bdev->bd_queue);
+	return result;
 }
 EXPORT_SYMBOL_GPL(bdev_read_page);
 
@@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 	int result;
 	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
+
 	if (!ops->rw_page || bdev_get_integrity(bdev))
 		return -EOPNOTSUPP;
+	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	if (result)
+		return result;
+
 	set_page_writeback(page);
 	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
 	if (result)
 		end_page_writeback(page);
 	else
 		unlock_page(page);
+	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3fe27f8d91f0..c0d2b7927c1f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -794,6 +794,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
+extern int blk_queue_enter(struct request_queue *q, gfp_t gfp);
+extern void blk_queue_exit(struct request_queue *q);
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
-- 
cgit v1.2.3


From b466c1dd73d5303a313fb0c962e4eb5879bc1336 Mon Sep 17 00:00:00 2001
From: Simon Wood <simon@mungewell.org>
Date: Thu, 19 Nov 2015 16:42:14 -0700
Subject: HID: Add vendor specific usage pages for Logitech G920

The Logitech G920 uses a couple of vendor specific usage pages,
which results in incorrect number of axis/buttons being detected.

This patch adds these pages to the 'ignore' list.

Reported-by: Elias Vanderstuyft <elias.vds@gmail.com>
Signed-off-by: Simon Wood <simon@mungewell.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 4 ++++
 include/linux/hid.h     | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 2ba6bf69b7d0..f4eeb6bcb9ac 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -960,6 +960,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		goto ignore;
 
 	case HID_UP_LOGIVENDOR:
+		/* intentional fallback */
+	case HID_UP_LOGIVENDOR2:
+		/* intentional fallback */
+	case HID_UP_LOGIVENDOR3:
 		goto ignore;
 
 	case HID_UP_PID:
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 251a1d382e23..a6d7a3fc2cb3 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -168,6 +168,8 @@ struct hid_item {
 #define HID_UP_MSVENDOR		0xff000000
 #define HID_UP_CUSTOM		0x00ff0000
 #define HID_UP_LOGIVENDOR	0xffbc0000
+#define HID_UP_LOGIVENDOR2   0xff090000
+#define HID_UP_LOGIVENDOR3   0xff430000
 #define HID_UP_LNVENDOR		0xffa00000
 #define HID_UP_SENSOR		0x00200000
 
-- 
cgit v1.2.3


From 0b59733b95f9d7af6bee6e6a4d0d444eb694c514 Mon Sep 17 00:00:00 2001
From: Javier Gonzalez <javier@javigon.com>
Date: Fri, 20 Nov 2015 13:47:56 +0100
Subject: lightnvm: keep track of block counts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maintain number of in use blocks, free blocks, and bad blocks in a per
lun basis. This allows the upper layers to get information about the
state of each lun.

Also, account for blocks reserved to the device on the free block count.
nr_free_blocks matches now the actual number of blocks on the free list
when the device is booted.

Signed-off-by: Javier Gonzalez <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/gennvm.c | 14 +++++++++++++-
 include/linux/lightnvm.h  |  2 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index c0d0eb2357a8..43c01e0af887 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -60,6 +60,8 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
 		lun->vlun.lun_id = i % dev->luns_per_chnl;
 		lun->vlun.chnl_id = i / dev->luns_per_chnl;
 		lun->vlun.nr_free_blocks = dev->blks_per_lun;
+		lun->vlun.nr_inuse_blocks = 0;
+		lun->vlun.nr_bad_blocks = 0;
 	}
 	return 0;
 }
@@ -87,6 +89,7 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
 		}
 
 		list_move_tail(&blk->list, &lun->bb_list);
+		lun->vlun.nr_bad_blocks++;
 	}
 
 	return 0;
@@ -139,6 +142,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 			list_move_tail(&blk->list, &lun->used_list);
 			blk->type = 1;
 			lun->vlun.nr_free_blocks--;
+			lun->vlun.nr_inuse_blocks++;
 		}
 	}
 
@@ -167,8 +171,10 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 			block->id = cur_block_id++;
 
 			/* First block is reserved for device */
-			if (unlikely(lun_iter == 0 && blk_iter == 0))
+			if (unlikely(lun_iter == 0 && blk_iter == 0)) {
+				lun->vlun.nr_free_blocks--;
 				continue;
+			}
 
 			list_add_tail(&block->list, &lun->free_list);
 		}
@@ -266,6 +272,7 @@ static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
 	blk->type = 1;
 
 	lun->vlun.nr_free_blocks--;
+	lun->vlun.nr_inuse_blocks++;
 
 	spin_unlock(&vlun->lock);
 out:
@@ -283,16 +290,21 @@ static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
 	case 1:
 		list_move_tail(&blk->list, &lun->free_list);
 		lun->vlun.nr_free_blocks++;
+		lun->vlun.nr_inuse_blocks--;
 		blk->type = 0;
 		break;
 	case 2:
 		list_move_tail(&blk->list, &lun->bb_list);
+		lun->vlun.nr_bad_blocks++;
+		lun->vlun.nr_inuse_blocks--;
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		pr_err("gennvm: erroneous block type (%lu -> %u)\n",
 							blk->id, blk->type);
 		list_move_tail(&blk->list, &lun->bb_list);
+		lun->vlun.nr_bad_blocks++;
+		lun->vlun.nr_inuse_blocks--;
 	}
 
 	spin_unlock(&vlun->lock);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index cbe288acb1de..831a20cf070c 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -213,7 +213,9 @@ struct nvm_lun {
 	int lun_id;
 	int chnl_id;
 
+	unsigned int nr_inuse_blocks;	/* Number of used blocks */
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
+	unsigned int nr_bad_blocks;	/* Number of bad blocks */
 	struct nvm_block *blocks;
 
 	spinlock_t lock;
-- 
cgit v1.2.3


From 2fde0e482db2b43bb4ed0e9aebfbe78ebcbbf5a6 Mon Sep 17 00:00:00 2001
From: Javier Gonzalez <javier@javigon.com>
Date: Fri, 20 Nov 2015 13:47:57 +0100
Subject: lightnvm: add free and bad lun info to show luns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add free block, used block, and bad block information to the show debug
interface. This information is used to debug how targets track blocks.

Also, change debug function name to make it more generic.

Signed-off-by: Javier Gonzalez <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   |  2 +-
 drivers/lightnvm/gennvm.c | 19 ++++++++++++++-----
 include/linux/lightnvm.h  |  4 ++--
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index f61d325fd978..5178645ac42b 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -544,7 +544,7 @@ static int nvm_configure_show(const char *val)
 	if (!dev->mt)
 		return 0;
 
-	dev->mt->free_blocks_print(dev);
+	dev->mt->lun_info_print(dev);
 
 	return 0;
 }
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 43c01e0af887..e20e74ec6b91 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -464,15 +464,24 @@ static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
 	return &gn->luns[lunid].vlun;
 }
 
-static void gennvm_free_blocks_print(struct nvm_dev *dev)
+static void gennvm_lun_info_print(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn = dev->mp;
 	struct gen_lun *lun;
 	unsigned int i;
 
-	gennvm_for_each_lun(gn, lun, i)
-		pr_info("%s: lun%8u\t%u\n",
-					dev->name, i, lun->vlun.nr_free_blocks);
+
+	gennvm_for_each_lun(gn, lun, i) {
+		spin_lock(&lun->vlun.lock);
+
+		pr_info("%s: lun%8u\t%u\t%u\t%u\n",
+				dev->name, i,
+				lun->vlun.nr_free_blocks,
+				lun->vlun.nr_inuse_blocks,
+				lun->vlun.nr_bad_blocks);
+
+		spin_unlock(&lun->vlun.lock);
+	}
 }
 
 static struct nvmm_type gennvm = {
@@ -490,7 +499,7 @@ static struct nvmm_type gennvm = {
 	.erase_blk	= gennvm_erase_blk,
 
 	.get_lun	= gennvm_get_lun,
-	.free_blocks_print = gennvm_free_blocks_print,
+	.lun_info_print = gennvm_lun_info_print,
 };
 
 static int __init gennvm_module_init(void)
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 831a20cf070c..3db5552b17d5 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -380,7 +380,7 @@ typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 								unsigned long);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
+typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 struct nvmm_type {
 	const char *name;
@@ -404,7 +404,7 @@ struct nvmm_type {
 	nvmm_get_lun_fn *get_lun;
 
 	/* Statistics */
-	nvmm_free_blocks_print_fn *free_blocks_print;
+	nvmm_lun_info_print_fn *lun_info_print;
 	struct list_head list;
 };
 
-- 
cgit v1.2.3


From 94a58c360a45c066ab5472cfd2bf2a4ba63aa532 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 20 Nov 2015 15:56:48 -0800
Subject: slab.h: sprinkle __assume_aligned attributes

The various allocators return aligned memory.  Telling the compiler that
allows it to generate better code in many cases, for example when the
return value is immediately passed to memset().

Some code does become larger, but at least we win twice as much as we lose:

$ scripts/bloat-o-meter /tmp/vmlinux vmlinux
add/remove: 0/0 grow/shrink: 13/52 up/down: 995/-2140 (-1145)

An example of the different (and smaller) code can be seen in mm_alloc(). Before:

:       48 8d 78 08             lea    0x8(%rax),%rdi
:       48 89 c1                mov    %rax,%rcx
:       48 89 c2                mov    %rax,%rdx
:       48 c7 00 00 00 00 00    movq   $0x0,(%rax)
:       48 c7 80 48 03 00 00    movq   $0x0,0x348(%rax)
:       00 00 00 00
:       31 c0                   xor    %eax,%eax
:       48 83 e7 f8             and    $0xfffffffffffffff8,%rdi
:       48 29 f9                sub    %rdi,%rcx
:       81 c1 50 03 00 00       add    $0x350,%ecx
:       c1 e9 03                shr    $0x3,%ecx
:       f3 48 ab                rep stos %rax,%es:(%rdi)

After:

:       48 89 c2                mov    %rax,%rdx
:       b9 6a 00 00 00          mov    $0x6a,%ecx
:       31 c0                   xor    %eax,%eax
:       48 89 d7                mov    %rdx,%rdi
:       f3 48 ab                rep stos %rax,%es:(%rdi)

So gcc's strategy is to do two possibly (but not really, of course)
unaligned stores to the first and last word, then do an aligned rep stos
covering the middle part with a little overlap.  Maybe arches which do not
allow unaligned stores gain even more.

I don't know if gcc can actually make use of alignments greater than 8 for
anything, so one could probably drop the __assume_xyz_alignment macros and
just use __assume_aligned(8).

The increases in code size are mostly caused by gcc deciding to
opencode strlen() using the check-four-bytes-at-a-time trick when it
knows the buffer is sufficiently aligned (one function grew by 200
bytes). Now it turns out that many of these strlen() calls showing up
were in fact redundant, and they're gone from -next. Applying the two
patches to next-20151001 bloat-o-meter instead says

add/remove: 0/0 grow/shrink: 6/52 up/down: 244/-2140 (-1896)

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 43 ++++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7c82e3b307a3..96940772bb92 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -157,6 +157,24 @@ size_t ksize(const void *);
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
 #endif
 
+/*
+ * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Intended for arches that get misalignment faults even for 64 bit integer
+ * aligned buffers.
+ */
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
+ * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
+ * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
+ * aligned pointers.
+ */
+#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
+#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
+#define __assume_page_alignment __assume_aligned(PAGE_SIZE)
+
 /*
  * Kmalloc array related definitions
  */
@@ -286,8 +304,8 @@ static __always_inline int kmalloc_index(size_t size)
 }
 #endif /* !CONFIG_SLOB */
 
-void *__kmalloc(size_t size, gfp_t flags);
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags);
+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment;
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment;
 void kmem_cache_free(struct kmem_cache *, void *);
 
 /*
@@ -301,8 +319,8 @@ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
 bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_NUMA
-void *__kmalloc_node(size_t size, gfp_t flags, int node);
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment;
 #else
 static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
@@ -316,12 +334,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f
 #endif
 
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
+extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment;
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 					   gfp_t gfpflags,
-					   int node, size_t size);
+					   int node, size_t size) __assume_slab_alignment;
 #else
 static __always_inline void *
 kmem_cache_alloc_node_trace(struct kmem_cache *s,
@@ -354,10 +372,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 }
 #endif /* CONFIG_TRACING */
 
-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
 
 #ifdef CONFIG_TRACING
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
 #else
 static __always_inline void *
 kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
@@ -482,15 +500,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 	return __kmalloc_node(size, flags, node);
 }
 
-/*
- * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
- * Intended for arches that get misalignment faults even for 64 bit integer
- * aligned buffers.
- */
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
-
 struct memcg_cache_array {
 	struct rcu_head rcu;
 	struct kmem_cache *entries[0];
-- 
cgit v1.2.3


From 5cf6a51e6062afe7cc507f32f1e5f7e6497ae844 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@intel.com>
Date: Fri, 20 Nov 2015 15:56:53 -0800
Subject: configfs: allow dynamic group creation

This patchset introduces IIO software triggers, offers a way of configuring
them via configfs and adds the IIO hrtimer based interrupt source to be used
with software triggers.

The architecture is now split in 3 parts, to remove all IIO trigger specific
parts from IIO configfs core:

(1) IIO configfs - creates the root of the IIO configfs subsys.
(2) IIO software triggers - software trigger implementation, dynamically
    creating /config/iio/triggers group.
(3) IIO hrtimer trigger - is the first interrupt source for software triggers
    (with syfs to follow). Each trigger type can implement its own set of
    attributes.

Lockdep seems to be happy with the locking in configfs patch.

This patch (of 5):

We don't want to hardcode default groups at subsystem
creation time. We export:
	* configfs_register_group
	* configfs_unregister_group
to allow drivers to programatically create/destroy groups
later, after module init time.

This is needed for IIO configfs support.

(akpm: the other 4 patches to be merged via the IIO tree)

Signed-off-by: Daniel Baluta <daniel.baluta@intel.com>
Suggested-by: Lars-Peter Clausen <lars@metafoo.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Joel Becker <jlbec@evilplan.org>
Cc: Hartmut Knaack <knaack.h@gmx.de>
Cc: Octavian Purdila <octavian.purdila@intel.com>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: Adriana Reus <adriana.reus@intel.com>
Cc: Cristina Opriceana <cristina.opriceana@gmail.com>
Cc: Peter Meerwald <pmeerw@pmeerw.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/configfs/dir.c        | 110 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/configfs.h |  10 +++++
 2 files changed, 120 insertions(+)

(limited to 'include/linux')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c81ce7f200a6..a7a1b218f308 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = {
 	.iterate	= configfs_readdir,
 };
 
+/**
+ * configfs_register_group - creates a parent-child relation between two groups
+ * @parent_group:	parent group
+ * @group:		child group
+ *
+ * link groups, creates dentry for the child and attaches it to the
+ * parent dentry.
+ *
+ * Return: 0 on success, negative errno code on error
+ */
+int configfs_register_group(struct config_group *parent_group,
+			    struct config_group *group)
+{
+	struct configfs_subsystem *subsys = parent_group->cg_subsys;
+	struct dentry *parent;
+	int ret;
+
+	mutex_lock(&subsys->su_mutex);
+	link_group(parent_group, group);
+	mutex_unlock(&subsys->su_mutex);
+
+	parent = parent_group->cg_item.ci_dentry;
+
+	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+	ret = create_default_group(parent_group, group);
+	if (!ret) {
+		spin_lock(&configfs_dirent_lock);
+		configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+		spin_unlock(&configfs_dirent_lock);
+	}
+	mutex_unlock(&d_inode(parent)->i_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(configfs_register_group);
+
+/**
+ * configfs_unregister_group() - unregisters a child group from its parent
+ * @group: parent group to be unregistered
+ *
+ * Undoes configfs_register_group()
+ */
+void configfs_unregister_group(struct config_group *group)
+{
+	struct configfs_subsystem *subsys = group->cg_subsys;
+	struct dentry *dentry = group->cg_item.ci_dentry;
+	struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
+
+	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+	spin_lock(&configfs_dirent_lock);
+	configfs_detach_prep(dentry, NULL);
+	spin_unlock(&configfs_dirent_lock);
+
+	configfs_detach_group(&group->cg_item);
+	d_inode(dentry)->i_flags |= S_DEAD;
+	dont_mount(dentry);
+	d_delete(dentry);
+	mutex_unlock(&d_inode(parent)->i_mutex);
+
+	dput(dentry);
+
+	mutex_lock(&subsys->su_mutex);
+	unlink_group(group);
+	mutex_unlock(&subsys->su_mutex);
+}
+EXPORT_SYMBOL(configfs_unregister_group);
+
+/**
+ * configfs_register_default_group() - allocates and registers a child group
+ * @parent_group:	parent group
+ * @name:		child group name
+ * @item_type:		child item type description
+ *
+ * boilerplate to allocate and register a child group with its parent. We need
+ * kzalloc'ed memory because child's default_group is initially empty.
+ *
+ * Return: allocated config group or ERR_PTR() on error
+ */
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+				const char *name,
+				struct config_item_type *item_type)
+{
+	int ret;
+	struct config_group *group;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+	config_group_init_type_name(group, name, item_type);
+
+	ret = configfs_register_group(parent_group, group);
+	if (ret) {
+		kfree(group);
+		return ERR_PTR(ret);
+	}
+	return group;
+}
+EXPORT_SYMBOL(configfs_register_default_group);
+
+/**
+ * configfs_unregister_default_group() - unregisters and frees a child group
+ * @group:	the group to act on
+ */
+void configfs_unregister_default_group(struct config_group *group)
+{
+	configfs_unregister_group(group);
+	kfree(group);
+}
+EXPORT_SYMBOL(configfs_unregister_default_group);
+
 int configfs_register_subsystem(struct configfs_subsystem *subsys)
 {
 	int err;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index a8a335b7fce0..758a029011b1 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -197,6 +197,16 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro
 int configfs_register_subsystem(struct configfs_subsystem *subsys);
 void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
 
+int configfs_register_group(struct config_group *parent_group,
+			    struct config_group *group);
+void configfs_unregister_group(struct config_group *group);
+
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+				const char *name,
+				struct config_item_type *item_type);
+void configfs_unregister_default_group(struct config_group *group);
+
 /* These functions can sleep and can alloc with GFP_KERNEL */
 /* WARNING: These cannot be called underneath configfs callbacks!! */
 int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
-- 
cgit v1.2.3


From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Fri, 20 Nov 2015 15:57:21 -0800
Subject: kernel/signal.c: unexport sigsuspend()

sigsuspend() is nowhere used except in signal.c itself, so we can mark it
static do not pollute the global namespace.

But this patch is more than a boring cleanup patch, it fixes a real issue
on UserModeLinux.  UML has a special console driver to display ttys using
xterm, or other terminal emulators, on the host side.  Vegard reported
that sometimes UML is unable to spawn a xterm and he's facing the
following warning:

  WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0()

It turned out that this warning makes absolutely no sense as the UML
xterm code calls sigsuspend() on the host side, at least it tries.  But
as the kernel itself offers a sigsuspend() symbol the linker choose this
one instead of the glibc wrapper.  Interestingly this code used to work
since ever but always blocked signals on the wrong side.  Some recent
kernel change made the WARN_ON() trigger and uncovered the bug.

It is a wonderful example of how much works by chance on computers. :-)

Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()")
Signed-off-by: Richard Weinberger <richard@nod.at>
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>	[3.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 1 -
 kernel/signal.c        | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ab1e0392b5ac..92557bbce7e7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
-extern int sigsuspend(sigset_t *);
 
 struct sigaction {
 #ifndef __ARCH_HAS_IRIX_SIGACTION
diff --git a/kernel/signal.c b/kernel/signal.c
index c0b01fe24bbd..f3f1f7a972fd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3503,7 +3503,7 @@ SYSCALL_DEFINE0(pause)
 
 #endif
 
-int sigsuspend(sigset_t *set)
+static int sigsuspend(sigset_t *set)
 {
 	current->saved_sigmask = current->blocked;
 	set_current_blocked(set);
-- 
cgit v1.2.3


From 21fa8442799945beaca074cb5bcf7cfe24969d59 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Fri, 20 Nov 2015 15:57:32 -0800
Subject: mm: fix up sparse warning in gfpflags_allow_blocking

sparse says:

    include/linux/gfp.h:274:26: warning: incorrect type in return expression (different base types)
    include/linux/gfp.h:274:26:    expected bool
    include/linux/gfp.h:274:26:    got restricted gfp_t

...add a forced cast to silence the warning.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6523109e136d..8942af0813e3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -271,7 +271,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
-	return gfp_flags & __GFP_DIRECT_RECLAIM;
+	return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
 }
 
 #ifdef CONFIG_HIGHMEM
-- 
cgit v1.2.3


From 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 8 Nov 2015 08:52:31 -0500
Subject: tty: audit: Fix audit source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The data to audit/record is in the 'from' buffer (ie., the input
read buffer).

Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode")
Cc: stable <stable@vger.kernel.org> # 4.1+
Cc: Miloslav Trmač <mitr@redhat.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c     | 2 +-
 drivers/tty/tty_audit.c | 2 +-
 include/linux/tty.h     | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 13844261cd5f..ed776149261e 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty,
 {
 	struct n_tty_data *ldata = tty->disc_data;
 
-	tty_audit_add_data(tty, to, n, ldata->icanon);
+	tty_audit_add_data(tty, from, n, ldata->icanon);
 	return copy_to_user(to, from, n);
 }
 
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 90ca082935f6..3d245cd3d8e6 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
  *
  *	Audit @data of @size from @tty, if necessary.
  */
-void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			size_t size, unsigned icanon)
 {
 	struct tty_audit_buf *buf;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5b04b0a5375b..5e31f1b99037 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -607,7 +607,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			       size_t size, unsigned icanon);
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
@@ -615,8 +615,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
 extern void tty_audit_push(struct tty_struct *tty);
 extern int tty_audit_push_current(void);
 #else
-static inline void tty_audit_add_data(struct tty_struct *tty,
-		unsigned char *data, size_t size, unsigned icanon)
+static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
+				      size_t size, unsigned icanon)
 {
 }
 static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
-- 
cgit v1.2.3


From 865762a8119e74b5f0e236d2d8eaaf8be9292a06 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 20 Nov 2015 15:57:58 -0800
Subject: slab/slub: adjust kmem_cache_alloc_bulk API

Adjust kmem_cache_alloc_bulk API before we have any real users.

Adjust API to return type 'int' instead of previously type 'bool'.  This
is done to allow future extension of the bulk alloc API.

A future extension could be to allow SLUB to stop at a page boundary, when
specified by a flag, and then return the number of objects.

The advantage of this approach, would make it easier to make bulk alloc
run without local IRQs disabled.  With an approach of cmpxchg "stealing"
the entire c->freelist or page->freelist.  To avoid overshooting we would
stop processing at a slab-page boundary.  Else we always end up returning
some objects at the cost of another cmpxchg.

To keep compatible with future users of this API linking against an older
kernel when using the new flag, we need to return the number of allocated
objects with this API change.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 +-
 mm/slab.c            | 2 +-
 mm/slab.h            | 2 +-
 mm/slab_common.c     | 6 +++---
 mm/slob.c            | 2 +-
 mm/slub.c            | 8 ++++----
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 96940772bb92..2037a861e367 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -316,7 +316,7 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * Note that interrupts must be enabled when calling these functions.
  */
 void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_NUMA
 void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
diff --git a/mm/slab.c b/mm/slab.c
index e0819fa96559..4765c97ce690 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3419,7 +3419,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 								void **p)
 {
 	return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slab.h b/mm/slab.h
index 27492eb678f7..7b6087197997 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -170,7 +170,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
  * may be allocated or freed using these operations.
  */
 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_MEMCG_KMEM
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d88e97c10a2e..3c6a86b4ec25 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -112,7 +112,7 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
 		kmem_cache_free(s, p[i]);
 }
 
-bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
+int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 								void **p)
 {
 	size_t i;
@@ -121,10 +121,10 @@ bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 		void *x = p[i] = kmem_cache_alloc(s, flags);
 		if (!x) {
 			__kmem_cache_free_bulk(s, i, p);
-			return false;
+			return 0;
 		}
 	}
-	return true;
+	return i;
 }
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/slob.c b/mm/slob.c
index 0d7e5df74d1f..17e8f8cc7c53 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -617,7 +617,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 								void **p)
 {
 	return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slub.c b/mm/slub.c
index 34847044dfe5..46997517406e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2909,8 +2909,8 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
 /* Note that interrupts must be enabled when calling this function. */
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
-			   void **p)
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+			  void **p)
 {
 	struct kmem_cache_cpu *c;
 	int i;
@@ -2959,12 +2959,12 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 
 	/* memcg and kmem_cache debug support */
 	slab_post_alloc_hook(s, flags, size, p);
-	return true;
+	return i;
 error:
 	local_irq_enable();
 	slab_post_alloc_hook(s, flags, i, p);
 	__kmem_cache_free_bulk(s, i, p);
-	return false;
+	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_bulk);
 
-- 
cgit v1.2.3


From c86b3de8c8b02d7e474fdc002c8df533b844524c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 17 Nov 2015 17:48:52 +0100
Subject: thermal: fix thermal_zone_bind_cooling_device prototype

When the prototype for thermal_zone_bind_cooling_device
changed, the static inline wrapper function was left alone,
which in theory can cause build warnings:

I have seen this error in the past:
drivers/thermal/db8500_thermal.c: In function 'db8500_cdev_bind':
drivers/thermal/db8500_thermal.c:78:9: error: too many arguments to function 'thermal_zone_bind_cooling_device'
   ret = thermal_zone_bind_cooling_device(thermal, i, cdev,

while this one no longer shows up, there is no doubt that
the prototype is still wrong, so let's just fix it anyway.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 6cd9e9f629f1 ("thermal: of: fix cooling device weights in device tree")
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 4014a59828fc..613c29bd6baf 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -438,7 +438,8 @@ static inline void thermal_zone_device_unregister(
 static inline int thermal_zone_bind_cooling_device(
 	struct thermal_zone_device *tz, int trip,
 	struct thermal_cooling_device *cdev,
-	unsigned long upper, unsigned long lower)
+	unsigned long upper, unsigned long lower,
+	unsigned int weight)
 { return -ENODEV; }
 static inline int thermal_zone_unbind_cooling_device(
 	struct thermal_zone_device *tz, int trip,
-- 
cgit v1.2.3


From 91ab4b4d16e6649fbbf65f303c0c4e20ed680bd1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Thu, 19 Nov 2015 14:30:26 -0500
Subject: nfs: use sliding delay when LAYOUTGET gets NFS4ERR_DELAY

When LAYOUTGET gets NFS4ERR_DELAY, we currently will wait 15s before
retrying the call. That is a _very_ long time, so add a timeout value to
struct nfs4_layoutget and pass nfs4_async_handle_error a pointer to it.
This allows the RPC engine to use a sliding delay window, instead of a
15s delay.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 2 +-
 include/linux/nfs_xdr.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 765a03559363..89818036f035 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7866,7 +7866,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 			spin_unlock(&inode->i_lock);
 		goto out_restart;
 	}
-	if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN)
 		goto out_restart;
 out:
 	dprintk("<-- %s\n", __func__);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 570d630f98ae..11bbae44f4cb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -251,6 +251,7 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_res res;
 	struct rpc_cred *cred;
 	gfp_t gfp_flags;
+	long timeout;
 };
 
 struct nfs4_getdeviceinfo_args {
-- 
cgit v1.2.3


From fbc416ff86183e2203cdf975e2881d7c164b0271 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 20 Nov 2015 12:12:21 +0100
Subject: arm64: fix building without CONFIG_UID16

As reported by Michal Simek, building an ARM64 kernel with CONFIG_UID16
disabled currently fails because the system call table still needs to
reference the individual function entry points that are provided by
kernel/sys_ni.c in this case, and the declarations are hidden inside
of #ifdef CONFIG_UID16:

arch/arm64/include/asm/unistd32.h:57:8: error: 'sys_lchown16' undeclared here (not in a function)
 __SYSCALL(__NR_lchown, sys_lchown16)

I believe this problem only exists on ARM64, because older architectures
tend to not need declarations when their system call table is built
in assembly code, while newer architectures tend to not need UID16
support. ARM64 only uses these system calls for compatibility with
32-bit ARM binaries.

This changes the CONFIG_UID16 check into CONFIG_HAVE_UID16, which is
set unconditionally on ARM64 with CONFIG_COMPAT, so we see the
declarations whenever we need them, but otherwise the behavior is
unchanged.

Fixes: af1839eb4bd4 ("Kconfig: clean up the long arch list for the UID16 config option")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/syscalls.h | 2 +-
 include/linux/types.h    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a156b82dd14c..c2b66a277e98 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -524,7 +524,7 @@ asmlinkage long sys_chown(const char __user *filename,
 asmlinkage long sys_lchown(const char __user *filename,
 				uid_t user, gid_t group);
 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
 asmlinkage long sys_chown16(const char __user *filename,
 				old_uid_t user, old_gid_t group);
 asmlinkage long sys_lchown16(const char __user *filename,
diff --git a/include/linux/types.h b/include/linux/types.h
index 70d8500bddf1..70dd3dfde631 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -35,7 +35,7 @@ typedef __kernel_gid16_t        gid16_t;
 
 typedef unsigned long		uintptr_t;
 
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
 /* This is defined by include/asm-{arch}/posix_types.h */
 typedef __kernel_old_uid_t	old_uid_t;
 typedef __kernel_old_gid_t	old_gid_t;
-- 
cgit v1.2.3


From 7c7a0e945349a3d0d497d7f32db6ed33d4031110 Mon Sep 17 00:00:00 2001
From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
Date: Wed, 11 Nov 2015 09:12:25 +0800
Subject: ARM/PCI: Move align_resource function pointer to pci_host_bridge
 structure

Commit b3a72384fe29 ("ARM/PCI: Replace pci_sys_data->align_resource with
global function pointer") introduced an ARM-specific align_resource()
function pointer.  This is not portable to other arches and doesn't work
for platforms with two different PCIe host bridge controllers.

Move the function pointer to the pci_host_bridge structure so each host
bridge driver can specify its own align_resource() function.

Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/kernel/bios32.c | 19 +++++++++++--------
 drivers/pci/pci.h        |  2 --
 include/linux/pci.h      |  9 +++++++++
 3 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 6551d28c27e6..066f7f9ba411 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -17,11 +17,6 @@
 #include <asm/mach/pci.h>
 
 static int debug_pci;
-static resource_size_t (*align_resource)(struct pci_dev *dev,
-		  const struct resource *res,
-		  resource_size_t start,
-		  resource_size_t size,
-		  resource_size_t align) = NULL;
 
 /*
  * We can't use pci_get_device() here since we are
@@ -461,7 +456,6 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 		sys->busnr   = busnr;
 		sys->swizzle = hw->swizzle;
 		sys->map_irq = hw->map_irq;
-		align_resource = hw->align_resource;
 		INIT_LIST_HEAD(&sys->resources);
 
 		if (hw->private_data)
@@ -470,6 +464,8 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 		ret = hw->setup(nr, sys);
 
 		if (ret > 0) {
+			struct pci_host_bridge *host_bridge;
+
 			ret = pcibios_init_resources(nr, sys);
 			if (ret)  {
 				kfree(sys);
@@ -491,6 +487,9 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 			busnr = sys->bus->busn_res.end + 1;
 
 			list_add(&sys->node, head);
+
+			host_bridge = pci_find_host_bridge(sys->bus);
+			host_bridge->align_resource = hw->align_resource;
 		} else {
 			kfree(sys);
 			if (ret < 0)
@@ -578,14 +577,18 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 {
 	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
+	struct pci_host_bridge *host_bridge;
 
 	if (res->flags & IORESOURCE_IO && start & 0x300)
 		start = (start + 0x3ff) & ~0x3ff;
 
 	start = (start + align - 1) & ~(align - 1);
 
-	if (align_resource)
-		return align_resource(dev, res, start, size, align);
+	host_bridge = pci_find_host_bridge(dev->bus);
+
+	if (host_bridge->align_resource)
+		return host_bridge->align_resource(dev, res,
+				start, size, align);
 
 	return start;
 }
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fd2f03fa53f3..d390fc1475ec 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -337,6 +337,4 @@ static inline int pci_dev_specific_reset(struct pci_dev *dev, int probe)
 }
 #endif
 
-struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
-
 #endif /* DRIVERS_PCI_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e828e7b4afec..6ae25aae88fd 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -412,9 +412,18 @@ struct pci_host_bridge {
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
 	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
+	/* Resource alignment requirements */
+	resource_size_t (*align_resource)(struct pci_dev *dev,
+			const struct resource *res,
+			resource_size_t start,
+			resource_size_t size,
+			resource_size_t align);
 };
 
 #define	to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
+
+struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
+
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 		     void (*release_fn)(struct pci_host_bridge *),
 		     void *release_data);
-- 
cgit v1.2.3


From 3a66d7dca186ebdef9b0bf55e216778fa598062c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 22 Oct 2015 16:02:14 -0700
Subject: kref: Remove kref_put_spinlock_irqsave()

The last user is gone. Hence remove this function.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Joern Engel <joern@logfs.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/linux/kref.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 484604d184be..e15828fd71f1 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -19,7 +19,6 @@
 #include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
-#include <linux/spinlock.h>
 
 struct kref {
 	atomic_t refcount;
@@ -99,38 +98,6 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
 	return kref_sub(kref, 1, release);
 }
 
-/**
- * kref_put_spinlock_irqsave - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
- * @lock: lock to take in release case
- *
- * Behaves identical to kref_put with one exception.  If the reference count
- * drops to zero, the lock will be taken atomically wrt dropping the reference
- * count.  The release function has to call spin_unlock() without _irqrestore.
- */
-static inline int kref_put_spinlock_irqsave(struct kref *kref,
-		void (*release)(struct kref *kref),
-		spinlock_t *lock)
-{
-	unsigned long flags;
-
-	WARN_ON(release == NULL);
-	if (atomic_add_unless(&kref->refcount, -1, 1))
-		return 0;
-	spin_lock_irqsave(lock, flags);
-	if (atomic_dec_and_test(&kref->refcount)) {
-		release(kref);
-		local_irq_restore(flags);
-		return 1;
-	}
-	spin_unlock_irqrestore(lock, flags);
-	return 0;
-}
-
 static inline int kref_put_mutex(struct kref *kref,
 				 void (*release)(struct kref *kref),
 				 struct mutex *lock)
-- 
cgit v1.2.3


From d8ce9bf5551bfea431893bdd0a943f24a5170828 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Sun, 27 Dec 2015 17:25:20 +0800
Subject: HID: move to_hid_device() to hid.h

to_hid_device() macro is defined in both hid-lg4ff.c and
hid-logitech-hidpp.c. So I move it to include/linux/hid.h.

Signed-off-by: Geliang Tang <geliangtang@163.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lg4ff.c          | 2 --
 drivers/hid/hid-logitech-hidpp.c | 2 --
 include/linux/hid.h              | 3 +++
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index fbddcb37ae98..3e160ff5f218 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c
@@ -33,8 +33,6 @@
 #include "hid-lg4ff.h"
 #include "hid-ids.h"
 
-#define to_hid_device(pdev) container_of(pdev, struct hid_device, dev)
-
 #define LG4FF_MMODE_IS_MULTIMODE 0
 #define LG4FF_MMODE_SWITCHED 1
 #define LG4FF_MMODE_NOT_MULTIMODE 2
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index f2a481125522..bd2ab476c65e 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1310,8 +1310,6 @@ struct g920_private_data {
 	u16 range;
 };
 
-#define to_hid_device(pdev) container_of(pdev, struct hid_device, dev)
-
 static ssize_t g920_range_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a6d7a3fc2cb3..1472026367ed 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -565,6 +565,9 @@ struct hid_device {							/* device report descriptor */
 	wait_queue_head_t debug_wait;
 };
 
+#define to_hid_device(pdev) \
+	container_of(pdev, struct hid_device, dev)
+
 static inline void *hid_get_drvdata(struct hid_device *hdev)
 {
 	return dev_get_drvdata(&hdev->dev);
-- 
cgit v1.2.3


From ba91a96718d17160890e161f702db6e60747248a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Sun, 27 Dec 2015 17:25:22 +0800
Subject: HID: add a new helper to_hid_driver()

Add a new helper to_hid_driver() and use it in hid-core.c.

Signed-off-by: Geliang Tang <geliangtang@163.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 7 +++----
 include/linux/hid.h    | 3 +++
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index a6e24e00a37b..9d75205a511e 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2077,7 +2077,7 @@ struct hid_dynid {
 static ssize_t store_new_id(struct device_driver *drv, const char *buf,
 		size_t count)
 {
-	struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
+	struct hid_driver *hdrv = to_hid_driver(drv);
 	struct hid_dynid *dynid;
 	__u32 bus, vendor, product;
 	unsigned long driver_data = 0;
@@ -2139,7 +2139,7 @@ static const struct hid_device_id *hid_match_device(struct hid_device *hdev,
 
 static int hid_bus_match(struct device *dev, struct device_driver *drv)
 {
-	struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
+	struct hid_driver *hdrv = to_hid_driver(drv);
 	struct hid_device *hdev = to_hid_device(dev);
 
 	return hid_match_device(hdev, hdrv) != NULL;
@@ -2147,8 +2147,7 @@ static int hid_bus_match(struct device *dev, struct device_driver *drv)
 
 static int hid_device_probe(struct device *dev)
 {
-	struct hid_driver *hdrv = container_of(dev->driver,
-			struct hid_driver, driver);
+	struct hid_driver *hdrv = to_hid_driver(dev->driver);
 	struct hid_device *hdev = to_hid_device(dev);
 	const struct hid_device_id *id;
 	int ret = 0;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 1472026367ed..75b66eccc692 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -717,6 +717,9 @@ struct hid_driver {
 	struct device_driver driver;
 };
 
+#define to_hid_driver(pdrv) \
+	container_of(pdrv, struct hid_driver, driver)
+
 /**
  * hid_ll_driver - low level driver callbacks
  * @start: called on probe to start the device
-- 
cgit v1.2.3